diff --git a/filters/adaptor.go b/filters/adaptor.go new file mode 100644 index 000000000..27465afd8 --- /dev/null +++ b/filters/adaptor.go @@ -0,0 +1,15 @@ +package filters + +// Adaptor specifies the mapping of fieldpaths to a type. For the given field +// path, the value and whether it is present should be returned. The mapping of +// the fieldpath to a field is deferred to the adaptor implementation, but +// should generally follow protobuf field path/mask semantics. +type Adaptor interface { + Field(fieldpath []string) (value string, present bool) +} + +type AdapterFunc func(fieldpath []string) (string, bool) + +func (fn AdapterFunc) Field(fieldpath []string) (string, bool) { + return fn(fieldpath) +} diff --git a/filters/filter.go b/filters/filter.go new file mode 100644 index 000000000..2c9c3d702 --- /dev/null +++ b/filters/filter.go @@ -0,0 +1,155 @@ +// Package filters defines a syntax and parser that can be used for the +// filtration of items across the containerd API. The core is built on the +// concept of protobuf field paths, with quoting. Several operators allow the +// user to flexibly select items based on field presence, equality, inequality +// and regular expressions. Flexible adaptors support working with any type. +// +// The syntax is fairly familiar, if you've used container ecosystem +// projects. At the core, we base it on the concept of protobuf field +// paths, augmenting with the ability to quote portions of the field path +// to match arbitrary labels. These "selectors" come in the following +// syntax: +// +// ``` +// [] +// ``` +// +// A basic example is as follows: +// +// ``` +// name==foo +// ``` +// +// This would match all objects that have a field `name` with the value +// `foo`. If we only want to test if the field is present, we can omit the +// operator. This is most useful for matching labels in containerd. The +// following will match objects that have the field "labels" and have the +// label "foo" defined: +// +// ``` +// labels.foo +// ``` +// +// We also allow for quoting of parts of the field path to allow matching +// of arbitrary items: +// +// ``` +// labels."very complex label"==something +// ``` +// +// We also define `!=` and `~=` as operators. The `!=` will match all +// objects that don't match the value for a field and `~=` will compile the +// target value as a regular expression and match the field value against that. +// +// Selectors can be combined using a comma, such that the resulting +// selector will require all selectors are matched for the object to match. +// The following example will match objects that are named `foo` and have +// the label `bar`: +// +// ``` +// name==foo,labels.bar +// ``` +// +package filters + +import ( + "regexp" + + "github.com/containerd/containerd/log" +) + +type Filter interface { + Match(adaptor Adaptor) bool +} + +type FilterFunc func(Adaptor) bool + +func (fn FilterFunc) Match(adaptor Adaptor) bool { + return fn(adaptor) +} + +var Always FilterFunc = func(adaptor Adaptor) bool { + return true +} + +type Any []Filter + +func (m Any) Match(adaptor Adaptor) bool { + for _, m := range m { + if m.Match(adaptor) { + return true + } + } + + return false +} + +type All []Filter + +func (m All) Match(adaptor Adaptor) bool { + for _, m := range m { + if !m.Match(adaptor) { + return false + } + } + + return true +} + +type operator int + +const ( + operatorPresent = iota + operatorEqual + operatorNotEqual + operatorMatches +) + +func (op operator) String() string { + switch op { + case operatorPresent: + return "?" + case operatorEqual: + return "==" + case operatorNotEqual: + return "!=" + case operatorMatches: + return "~=" + } + + return "unknown" +} + +type selector struct { + fieldpath []string + operator operator + value string + re *regexp.Regexp +} + +func (m selector) Match(adaptor Adaptor) bool { + value, present := adaptor.Field(m.fieldpath) + + switch m.operator { + case operatorPresent: + return present + case operatorEqual: + return present && value == m.value + case operatorNotEqual: + return value != m.value + case operatorMatches: + if m.re == nil { + r, err := regexp.Compile(m.value) + if err != nil { + log.L.Errorf("error compiling regexp %q", m.value) + return false + } + + m.re = r + } + + return m.re.MatchString(value) + default: + return false + } +} diff --git a/filters/filter_test.go b/filters/filter_test.go new file mode 100644 index 000000000..ec6cb6df3 --- /dev/null +++ b/filters/filter_test.go @@ -0,0 +1,246 @@ +package filters + +import ( + "reflect" + "strings" + "testing" +) + +func TestFilters(t *testing.T) { + type cEntry struct { + Name string + Other string + Labels map[string]string + } + + corpusS := []cEntry{ + { + Name: "foo", + Labels: map[string]string{ + "foo": "true", + }, + }, + { + Name: "bar", + }, + { + Name: "foo", + Labels: map[string]string{ + "foo": "present", + "more complex label": "present", + }, + }, + { + Name: "bar", + Labels: map[string]string{ + "bar": "true", + }, + }, + { + Name: "fooer", + Labels: map[string]string{ + "more complex label with \\ and \"": "present", + }, + }, + { + Name: "fooer", + Labels: map[string]string{ + "more complex label with \\ and \".post": "present", + }, + }, + { + Name: "baz", + Other: "too complex, yo", + }, + { + Name: "bazo", + Other: "abc", + }, + } + + var corpus []interface{} + for _, entry := range corpusS { + corpus = append(corpus, entry) + } + + // adapt shows an example of how to build an adaptor function for a type. + adapt := func(o interface{}) Adaptor { + obj := o.(cEntry) + return AdapterFunc(func(fieldpath []string) (string, bool) { + switch fieldpath[0] { + case "name": + return obj.Name, len(obj.Name) > 0 + case "other": + return obj.Other, len(obj.Other) > 0 + case "labels": + value, ok := obj.Labels[strings.Join(fieldpath[1:], ".")] + return value, ok + } + + return "", false + }) + } + + for _, testcase := range []struct { + name string + input string + expected []interface{} + errString string + }{ + { + name: "Empty", + input: "", + expected: corpus, + }, + { + name: "Present", + input: "name", + expected: corpus, + }, + { + name: "LabelPresent", + input: "labels.foo", + expected: []interface{}{ + corpus[0], + corpus[2], + }, + }, + { + name: "LabelValue", + input: "labels.foo==true", + expected: []interface{}{ + corpus[0], + }, + }, + { + name: "Name", + input: "name==bar", + expected: []interface{}{ + corpus[1], + corpus[3], + }, + }, + { + name: "NameNotEqual", + input: "name!=bar", + expected: []interface{}{ + corpus[0], + corpus[2], + corpus[4], + corpus[5], + corpus[6], + corpus[7], + }, + }, + { + name: "NameAndLabelPresent", + input: "name==bar,labels.bar", + expected: []interface{}{ + corpus[3], + }, + }, + { + name: "QuotedValue", + input: "other==\"too complex, yo\"", + expected: []interface{}{ + corpus[6], + }, + }, + { + name: "RegexpValue", + input: "other~=[abc]+,name!=foo", + expected: []interface{}{ + corpus[6], + corpus[7], + }, + }, + { + name: "NameAndLabelValue", + input: "name==bar,labels.bar==true", + expected: []interface{}{ + corpus[3], + }, + }, + { + name: "NameAndLabelValueNoMatch", + input: "name==bar,labels.bar==wrong", + }, + { + name: "LabelQuotedFieldPathPresent", + input: `name==foo,labels."more complex label"`, + expected: []interface{}{ + corpus[2], + }, + }, + { + name: "LabelQuotedFieldPathPresentWithQuoted", + input: `labels."more complex label with \\ and \""==present`, + expected: []interface{}{ + corpus[4], + }, + }, + { + name: "LabelQuotedFieldPathPresentWithQuotedEmbed", + input: `labels."more complex label with \\ and \"".post==present`, + expected: []interface{}{ + corpus[5], + }, + }, + { + name: "LabelQuotedFieldPathPresentWithQuotedEmbedInvalid", + input: `labels.?"more complex label with \\ and \"".post==present`, + errString: `filters: parse error: [labels. >|?|< "more complex label with \\ and \"".post==present]: expected field or quoted`, + }, + { + name: "TrailingComma", + input: "name==foo,", + errString: `filters: parse error: [name==foo,]: expected field or quoted`, + }, + { + name: "TrailingFieldSeparator", + input: "labels.", + errString: `filters: parse error: [labels.]: expected field or quoted`, + }, + { + name: "MissingValue", + input: "image~=,id?=?fbaq", + errString: `filters: parse error: [image~= >|,|< id?=?fbaq]: expected value or quoted`, + }, + } { + t.Run(testcase.name, func(t *testing.T) { + t.Logf("testcase: %q", testcase.input) + filter, err := Parse(testcase.input) + if testcase.errString != "" { + if err == nil { + t.Fatalf("expected an error, but received nil") + } + if err.Error() != testcase.errString { + t.Fatalf("error %v != %v", err, testcase.errString) + } + + return + } else { + if err != nil { + t.Fatal(err) + } + } + + if filter == nil { + t.Fatal("filter should not be nil") + } + + t.Log("filter", filter) + var results []interface{} + for _, item := range corpus { + adaptor := adapt(item) + if filter.Match(adaptor) { + results = append(results, item) + } + } + + if !reflect.DeepEqual(results, testcase.expected) { + t.Fatalf("%q: %#v != %#v", testcase.input, results, testcase.expected) + } + }) + } +} diff --git a/filters/parser.go b/filters/parser.go new file mode 100644 index 000000000..abd2a10a1 --- /dev/null +++ b/filters/parser.go @@ -0,0 +1,239 @@ +package filters + +import ( + "fmt" + "io" + "strconv" + + "github.com/pkg/errors" +) + +/* +Parse the strings into a filter that may be used with an adaptor. + +The filter is made up of zero or more selectors. + +The format is a comma separated list of expressions, in the form of +``, known as selectors. All selectors must match the +target object for the filter to be true. + +We define the operators "==" for equality, "!=" for not equal and "~=" for a +regular expression. If the operator and value are not present, the matcher will +test for the presence of a value, as defined by the target object. + +The formal grammar is as follows: + +selectors := selector ("," selector)* +selector := fieldpath (operator value) +fieldpath := field ('.' field)* +field := quoted | [A-Za-z] [A-Za-z0-9_]+ +operator := "==" | "!=" | "~=" +value := quoted | [^\s,]+ +quoted := + +*/ +func Parse(s string) (Filter, error) { + // special case empty to match all + if s == "" { + return Always, nil + } + + p := parser{input: s} + return p.parse() +} + +type parser struct { + input string + scanner scanner +} + +func (p *parser) parse() (Filter, error) { + p.scanner.init(p.input) + + ss, err := p.selectors() + if err != nil { + return nil, errors.Wrap(err, "filters") + } + + return ss, nil +} + +func (p *parser) selectors() (Filter, error) { + s, err := p.selector() + if err != nil { + return nil, err + } + + ss := All{s} + +loop: + for { + tok := p.scanner.peek() + switch tok { + case ',': + pos, tok, _ := p.scanner.scan() + if tok != tokenSelectorSeparator { + return nil, p.mkerr(pos, "expected a separator") + } + + s, err := p.selector() + if err != nil { + return nil, err + } + + ss = append(ss, s) + case tokenEOF: + break loop + default: + panic("unconsumed input") + } + } + + return ss, nil +} + +func (p *parser) selector() (selector, error) { + fieldpath, err := p.fieldpath() + if err != nil { + return selector{}, err + } + + switch p.scanner.peek() { + case tokenSelectorSeparator, tokenEOF: + return selector{ + fieldpath: fieldpath, + operator: operatorPresent, + }, nil + } + + op, err := p.operator() + if err != nil { + return selector{}, err + } + + value, err := p.value() + if err != nil { + if err == io.EOF { + return selector{}, io.ErrUnexpectedEOF + } + return selector{}, err + } + + return selector{ + fieldpath: fieldpath, + value: value, + operator: op, + }, nil +} + +func (p *parser) fieldpath() ([]string, error) { + f, err := p.field() + if err != nil { + return nil, err + } + + fs := []string{f} +loop: + for { + tok := p.scanner.peek() // lookahead to consume field separtor + + switch tok { + case '.': + pos, tok, _ := p.scanner.scan() // consume separator + if tok != tokenFieldSeparator { + return nil, p.mkerr(pos, "expected a field separator (`.`)") + } + + f, err := p.field() + if err != nil { + return nil, err + } + + fs = append(fs, f) + default: + // let the layer above handle the other bad cases. + break loop + } + } + + return fs, nil +} + +func (p *parser) field() (string, error) { + pos, tok, s := p.scanner.scan() + switch tok { + case tokenField: + return s, nil + case tokenQuoted: + return p.unquote(pos, s) + } + + return "", p.mkerr(pos, "expected field or quoted") +} + +func (p *parser) operator() (operator, error) { + pos, tok, s := p.scanner.scan() + switch tok { + case tokenOperator: + switch s { + case "==": + return operatorEqual, nil + case "!=": + return operatorNotEqual, nil + case "~=": + return operatorMatches, nil + default: + return 0, p.mkerr(pos, "unsupported operator %q", s) + } + } + + return 0, p.mkerr(pos, `expected an operator ("=="|"!="|"~=")`) +} + +func (p *parser) value() (string, error) { + pos, tok, s := p.scanner.scan() + + switch tok { + case tokenValue, tokenField: + return s, nil + case tokenQuoted: + return p.unquote(pos, s) + } + + return "", p.mkerr(pos, "expected value or quoted") +} + +func (p *parser) unquote(pos int, s string) (string, error) { + uq, err := strconv.Unquote(s) + if err != nil { + return "", p.mkerr(pos, "unquoting failed: %v", err) + } + + return uq, nil +} + +type parseError struct { + input string + pos int + msg string +} + +func (pe parseError) Error() string { + if pe.pos < len(pe.input) { + before := pe.input[:pe.pos] + location := pe.input[pe.pos : pe.pos+1] // need to handle end + after := pe.input[pe.pos+1:] + + return fmt.Sprintf("[%s >|%s|< %s]: %v", before, location, after, pe.msg) + } + + return fmt.Sprintf("[%s]: %v", pe.input, pe.msg) +} + +func (p *parser) mkerr(pos int, format string, args ...interface{}) error { + return errors.Wrap(parseError{ + input: p.input, + pos: pos, + msg: fmt.Sprintf(format, args...), + }, "parse error") +} diff --git a/filters/scanner.go b/filters/scanner.go new file mode 100644 index 000000000..69734d7a5 --- /dev/null +++ b/filters/scanner.go @@ -0,0 +1,279 @@ +package filters + +import ( + "fmt" + "unicode" + "unicode/utf8" +) + +const ( + tokenEOF = -(iota + 1) + tokenQuoted + tokenValue + tokenField + tokenFieldSeparator + tokenOperator + tokenSelectorSeparator + tokenIllegal +) + +type token rune + +func (t token) String() string { + switch t { + case tokenEOF: + return "EOF" + case tokenQuoted: + return "Quoted" + case tokenValue: + return "Value" + case tokenField: + return "Field" + case tokenOperator: + return "Operator" + case tokenFieldSeparator: + return "FieldSeparator" + case tokenSelectorSeparator: + return "SelectorSeparator" + case tokenIllegal: + return "Illegal" + } + + return string(t) +} + +func (t token) GoString() string { + return "token" + t.String() +} + +type scanner struct { + input string + pos int + ppos int // bounds the current rune in the string + value bool +} + +func (s *scanner) init(input string) { + s.input = input + s.pos = 0 + s.ppos = 0 +} + +func (s *scanner) next() rune { + if s.pos >= len(s.input) { + return tokenEOF + } + s.pos = s.ppos + + r, w := utf8.DecodeRuneInString(s.input[s.ppos:]) + s.ppos += w + if r == utf8.RuneError { + if w > 0 { + return tokenIllegal + } else { + return tokenEOF + } + } + + if r == 0 { + return tokenIllegal + } + + return r +} + +func (s *scanner) peek() rune { + pos := s.pos + ppos := s.ppos + ch := s.next() + s.pos = pos + s.ppos = ppos + return ch +} + +func (s *scanner) scan() (int, token, string) { + var ( + ch = s.next() + pos = s.pos + ) + +chomp: + switch { + case ch == tokenEOF: + case ch == tokenIllegal: + case isQuoteRune(ch): + s.scanString(ch) + return pos, tokenQuoted, s.input[pos:s.ppos] + case ch == ',': + return pos, tokenSelectorSeparator, s.input[pos:s.ppos] + case ch == '.': + return pos, tokenFieldSeparator, s.input[pos:s.ppos] + case isOperatorRune(ch): + s.scanOperator() + s.value = true + return pos, tokenOperator, s.input[pos:s.ppos] + case unicode.IsSpace(ch): + // chomp + ch = s.next() + pos = s.pos + goto chomp + case s.value: + s.scanValue() + + // TODO(stevvooe): We can get rid of the value flag by by having a + // scanUnquoted that accumulates characters. If it is a legal field, + // then we return a field token. The parser can then treat fields as + // values. This will allow the default case here to just scan value or + // field. + s.value = false + return pos, tokenValue, s.input[pos:s.ppos] + case isFieldRune(ch): + s.scanField() + return pos, tokenField, s.input[pos:s.ppos] + } + + return s.pos, token(ch), "" +} + +func (s *scanner) scanField() { + for { + ch := s.peek() + if !isFieldRune(ch) { + break + } + s.next() + } +} + +func (s *scanner) scanOperator() { + for { + ch := s.peek() + switch ch { + case '=', '!', '~': + s.next() + default: + return + } + } +} + +func (s *scanner) scanValue() { + for { + ch := s.peek() + if !isValueRune(ch) { + break + } + s.next() + } +} + +func (s *scanner) scanString(quote rune) { + ch := s.next() // read character after quote + for ch != quote { + if ch == '\n' || ch < 0 { + s.error("literal not terminated") + return + } + if ch == '\\' { + ch = s.scanEscape(quote) + } else { + ch = s.next() + } + } + return +} + +func (s *scanner) scanEscape(quote rune) rune { + ch := s.next() // read character after '/' + switch ch { + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: + // nothing to do + ch = s.next() + case '0', '1', '2', '3', '4', '5', '6', '7': + ch = s.scanDigits(ch, 8, 3) + case 'x': + ch = s.scanDigits(s.next(), 16, 2) + case 'u': + ch = s.scanDigits(s.next(), 16, 4) + case 'U': + ch = s.scanDigits(s.next(), 16, 8) + default: + s.error("illegal char escape") + } + return ch +} + +func (s *scanner) scanDigits(ch rune, base, n int) rune { + for n > 0 && digitVal(ch) < base { + ch = s.next() + n-- + } + if n > 0 { + s.error("illegal char escape") + } + return ch +} + +func (s *scanner) error(msg string) { + fmt.Println("error fixme", msg) +} + +func digitVal(ch rune) int { + switch { + case '0' <= ch && ch <= '9': + return int(ch - '0') + case 'a' <= ch && ch <= 'f': + return int(ch - 'a' + 10) + case 'A' <= ch && ch <= 'F': + return int(ch - 'A' + 10) + } + return 16 // larger than any legal digit val +} + +func isFieldRune(r rune) bool { + return (r == '_' || isAlphaRune(r) || isDigitRune(r)) +} + +func isAlphaRune(r rune) bool { + return r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z' +} + +func isDigitRune(r rune) bool { + return r >= '0' && r <= '9' +} + +func isOperatorRune(r rune) bool { + switch r { + case '=', '!', '~': + return true + } + + return false +} + +func isQuoteRune(r rune) bool { + switch r { + case '"': // maybe add single quoting? + return true + } + + return false +} + +func isSeparatorRune(r rune) bool { + switch r { + case ',', '.': + return true + } + + return false +} + +func isValueRune(r rune) bool { + return r != ',' && !unicode.IsSpace(r) && + (unicode.IsLetter(r) || + unicode.IsDigit(r) || + unicode.IsNumber(r) || + unicode.IsGraphic(r) || + unicode.IsPunct(r)) +} diff --git a/filters/scanner_test.go b/filters/scanner_test.go new file mode 100644 index 000000000..43ec707fc --- /dev/null +++ b/filters/scanner_test.go @@ -0,0 +1,236 @@ +package filters + +import ( + "fmt" + "strconv" + "testing" +) + +type tokenResult struct { + pos int + token token + text string +} + +func (tr tokenResult) String() string { + return fmt.Sprintf("{pos: %v, token: %v, text: %q}", tr.pos, tr.token, tr.text) +} + +func TestScanner(t *testing.T) { + + for _, testcase := range []struct { + name string + input string + expected []tokenResult + }{ + { + name: "Field", + input: "name", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenEOF}, + }, + }, + { + name: "SelectorsWithOperators", + input: "name==value,foo!=bar", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "=="}, + {pos: 6, token: tokenValue, text: "value"}, + {pos: 11, token: tokenSelectorSeparator, text: ","}, + {pos: 12, token: tokenField, text: "foo"}, + {pos: 15, token: tokenOperator, text: "!="}, + {pos: 17, token: tokenValue, text: "bar"}, + {pos: 20, token: tokenEOF}, + }, + }, + { + name: "SelectorsWithFieldPaths", + input: "name==value,labels.foo=value,other.bar~=match", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "=="}, + {pos: 6, token: tokenValue, text: "value"}, + {pos: 11, token: tokenSelectorSeparator, text: ","}, + {pos: 12, token: tokenField, text: "labels"}, + {pos: 18, token: tokenFieldSeparator, text: "."}, + {pos: 19, token: tokenField, text: "foo"}, + {pos: 22, token: tokenOperator, text: "="}, + {pos: 23, token: tokenValue, text: "value"}, + {pos: 28, token: tokenSelectorSeparator, text: ","}, + {pos: 29, token: tokenField, text: "other"}, + {pos: 34, token: tokenFieldSeparator, text: "."}, + {pos: 35, token: tokenField, text: "bar"}, + {pos: 38, token: tokenOperator, text: "~="}, + {pos: 40, token: tokenValue, text: "match"}, + {pos: 45, token: tokenEOF}, + }, + }, + { + name: "RegexpValue", + input: "name~=[abc]+,foo=test", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "~="}, + {pos: 6, token: tokenValue, text: "[abc]+"}, + {pos: 12, token: tokenSelectorSeparator, text: ","}, + {pos: 13, token: tokenField, text: "foo"}, + {pos: 16, token: tokenOperator, text: "="}, + {pos: 17, token: tokenValue, text: "test"}, + {pos: 21, token: tokenEOF}, + }, + }, + { + name: "RegexpEscapedValue", + input: `name~=[abc]\+,foo=test`, + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "~="}, + {pos: 6, token: tokenValue, text: "[abc]\\+"}, + {pos: 13, token: tokenSelectorSeparator, text: ","}, + {pos: 14, token: tokenField, text: "foo"}, + {pos: 17, token: tokenOperator, text: "="}, + {pos: 18, token: tokenValue, text: "test"}, + {pos: 22, token: tokenEOF}, + }, + }, + { + name: "Cowsay", + input: "name~=牛,labels.moo=true", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "~="}, + {pos: 6, token: tokenValue, text: "牛"}, + {pos: 9, token: tokenSelectorSeparator, text: ","}, + {pos: 10, token: tokenField, text: "labels"}, + {pos: 16, token: tokenFieldSeparator, text: "."}, + {pos: 17, token: tokenField, text: "moo"}, + {pos: 20, token: tokenOperator, text: "="}, + {pos: 21, token: tokenValue, text: "true"}, + {pos: 25, token: tokenEOF}, + }, + }, + { + name: "Escapes", + input: `name~="asdf\n\tfooo"`, + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "~="}, + {pos: 6, token: tokenQuoted, text: "\"asdf\\n\\tfooo\""}, + {pos: 20, token: tokenEOF}, + }, + }, + { + name: "NullInput", + input: "foo\x00bar", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "foo"}, + {pos: 3, token: tokenIllegal}, + {pos: 4, token: tokenField, text: "bar"}, + {pos: 7, token: tokenEOF}, + }, + }, + { + name: "SpacesChomped", + input: "foo = bar ", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "foo"}, + {pos: 4, token: tokenOperator, text: "="}, + {pos: 6, token: tokenValue, text: "bar"}, + {pos: 13, token: tokenEOF}, + }, + }, + { + name: "PartialInput", + input: "interrupted=", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "interrupted"}, + {pos: 11, token: tokenOperator, text: "="}, + {pos: 12, token: tokenEOF}, + }, + }, + { + name: "DoubleValue", + input: "doublevalue=value value", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "doublevalue"}, + {pos: 11, token: tokenOperator, text: "="}, + {pos: 12, token: tokenValue, text: "value"}, + {pos: 18, token: tokenField, text: "value"}, + {pos: 23, token: tokenEOF}, + }, + }, + { + name: "LeadingWithQuoted", + input: `"leading quote".postquote==value`, + expected: []tokenResult{ + {pos: 0, token: tokenQuoted, text: "\"leading quote\""}, + {pos: 15, token: tokenFieldSeparator, text: "."}, + {pos: 16, token: tokenField, text: "postquote"}, + {pos: 25, token: tokenOperator, text: "=="}, + {pos: 27, token: tokenValue, text: "value"}, + {pos: 32, token: tokenEOF}, + }, + }, + { + name: "MissingValue", + input: "input==,id?=ff", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "input"}, + {pos: 5, token: tokenOperator, text: "=="}, + {pos: 7, token: tokenSelectorSeparator, text: ","}, + {pos: 8, token: tokenValue, text: "id?=ff"}, + {pos: 14, token: tokenEOF}, + }, + }, + } { + t.Run(testcase.name, func(t *testing.T) { + var sc scanner + sc.init(testcase.input) + t.Logf("scan %q", testcase.input) + + // If you leave the expected empty, the test case will just print + // out the token stream, which you can paste into the testcase when + // adding new cases. + if len(testcase.expected) == 0 { + fmt.Println("Name", testcase.name) + } + + for i := 0; ; i++ { + pos, tok, s := sc.scan() + t.Log("token", pos, tok, strconv.Quote(s)) + if len(testcase.expected) == 0 { + if len(s) > 0 { + fmt.Printf("{pos: %v, token: %#v, text: %q},\n", pos, tok, s) + } else { + fmt.Printf("{pos: %v, token: %#v},\n", pos, tok) + } + } else { + tokv := tokenResult{pos: pos, token: tok, text: s} + if i >= len(testcase.expected) { + t.Fatalf("too many tokens parsed") + } + + if tokv != testcase.expected[i] { + t.Fatalf("token unexpected: %v != %v", tokv, testcase.expected[i]) + } + } + + if tok == tokenEOF { + break + } + } + + // make sure we've eof'd + _, tok, _ := sc.scan() + if tok != tokenEOF { + t.Fatal("must consume all input") + } + + if len(testcase.expected) == 0 { + t.Fatal("must define expected tokens") + } + }) + } +}