Merge pull request #995 from stevvooe/filter-syntax
filters: add package for filter syntax
This commit is contained in:
commit
ebf4620206
15
filters/adaptor.go
Normal file
15
filters/adaptor.go
Normal file
@ -0,0 +1,15 @@
|
||||
package filters
|
||||
|
||||
// Adaptor specifies the mapping of fieldpaths to a type. For the given field
|
||||
// path, the value and whether it is present should be returned. The mapping of
|
||||
// the fieldpath to a field is deferred to the adaptor implementation, but
|
||||
// should generally follow protobuf field path/mask semantics.
|
||||
type Adaptor interface {
|
||||
Field(fieldpath []string) (value string, present bool)
|
||||
}
|
||||
|
||||
type AdapterFunc func(fieldpath []string) (string, bool)
|
||||
|
||||
func (fn AdapterFunc) Field(fieldpath []string) (string, bool) {
|
||||
return fn(fieldpath)
|
||||
}
|
155
filters/filter.go
Normal file
155
filters/filter.go
Normal file
@ -0,0 +1,155 @@
|
||||
// Package filters defines a syntax and parser that can be used for the
|
||||
// filtration of items across the containerd API. The core is built on the
|
||||
// concept of protobuf field paths, with quoting. Several operators allow the
|
||||
// user to flexibly select items based on field presence, equality, inequality
|
||||
// and regular expressions. Flexible adaptors support working with any type.
|
||||
//
|
||||
// The syntax is fairly familiar, if you've used container ecosystem
|
||||
// projects. At the core, we base it on the concept of protobuf field
|
||||
// paths, augmenting with the ability to quote portions of the field path
|
||||
// to match arbitrary labels. These "selectors" come in the following
|
||||
// syntax:
|
||||
//
|
||||
// ```
|
||||
// <fieldpath>[<operator><value>]
|
||||
// ```
|
||||
//
|
||||
// A basic example is as follows:
|
||||
//
|
||||
// ```
|
||||
// name==foo
|
||||
// ```
|
||||
//
|
||||
// This would match all objects that have a field `name` with the value
|
||||
// `foo`. If we only want to test if the field is present, we can omit the
|
||||
// operator. This is most useful for matching labels in containerd. The
|
||||
// following will match objects that have the field "labels" and have the
|
||||
// label "foo" defined:
|
||||
//
|
||||
// ```
|
||||
// labels.foo
|
||||
// ```
|
||||
//
|
||||
// We also allow for quoting of parts of the field path to allow matching
|
||||
// of arbitrary items:
|
||||
//
|
||||
// ```
|
||||
// labels."very complex label"==something
|
||||
// ```
|
||||
//
|
||||
// We also define `!=` and `~=` as operators. The `!=` will match all
|
||||
// objects that don't match the value for a field and `~=` will compile the
|
||||
// target value as a regular expression and match the field value against that.
|
||||
//
|
||||
// Selectors can be combined using a comma, such that the resulting
|
||||
// selector will require all selectors are matched for the object to match.
|
||||
// The following example will match objects that are named `foo` and have
|
||||
// the label `bar`:
|
||||
//
|
||||
// ```
|
||||
// name==foo,labels.bar
|
||||
// ```
|
||||
//
|
||||
package filters
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/containerd/containerd/log"
|
||||
)
|
||||
|
||||
type Filter interface {
|
||||
Match(adaptor Adaptor) bool
|
||||
}
|
||||
|
||||
type FilterFunc func(Adaptor) bool
|
||||
|
||||
func (fn FilterFunc) Match(adaptor Adaptor) bool {
|
||||
return fn(adaptor)
|
||||
}
|
||||
|
||||
var Always FilterFunc = func(adaptor Adaptor) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
type Any []Filter
|
||||
|
||||
func (m Any) Match(adaptor Adaptor) bool {
|
||||
for _, m := range m {
|
||||
if m.Match(adaptor) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type All []Filter
|
||||
|
||||
func (m All) Match(adaptor Adaptor) bool {
|
||||
for _, m := range m {
|
||||
if !m.Match(adaptor) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
type operator int
|
||||
|
||||
const (
|
||||
operatorPresent = iota
|
||||
operatorEqual
|
||||
operatorNotEqual
|
||||
operatorMatches
|
||||
)
|
||||
|
||||
func (op operator) String() string {
|
||||
switch op {
|
||||
case operatorPresent:
|
||||
return "?"
|
||||
case operatorEqual:
|
||||
return "=="
|
||||
case operatorNotEqual:
|
||||
return "!="
|
||||
case operatorMatches:
|
||||
return "~="
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
type selector struct {
|
||||
fieldpath []string
|
||||
operator operator
|
||||
value string
|
||||
re *regexp.Regexp
|
||||
}
|
||||
|
||||
func (m selector) Match(adaptor Adaptor) bool {
|
||||
value, present := adaptor.Field(m.fieldpath)
|
||||
|
||||
switch m.operator {
|
||||
case operatorPresent:
|
||||
return present
|
||||
case operatorEqual:
|
||||
return present && value == m.value
|
||||
case operatorNotEqual:
|
||||
return value != m.value
|
||||
case operatorMatches:
|
||||
if m.re == nil {
|
||||
r, err := regexp.Compile(m.value)
|
||||
if err != nil {
|
||||
log.L.Errorf("error compiling regexp %q", m.value)
|
||||
return false
|
||||
}
|
||||
|
||||
m.re = r
|
||||
}
|
||||
|
||||
return m.re.MatchString(value)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
246
filters/filter_test.go
Normal file
246
filters/filter_test.go
Normal file
@ -0,0 +1,246 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFilters(t *testing.T) {
|
||||
type cEntry struct {
|
||||
Name string
|
||||
Other string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
corpusS := []cEntry{
|
||||
{
|
||||
Name: "foo",
|
||||
Labels: map[string]string{
|
||||
"foo": "true",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "bar",
|
||||
},
|
||||
{
|
||||
Name: "foo",
|
||||
Labels: map[string]string{
|
||||
"foo": "present",
|
||||
"more complex label": "present",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "bar",
|
||||
Labels: map[string]string{
|
||||
"bar": "true",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "fooer",
|
||||
Labels: map[string]string{
|
||||
"more complex label with \\ and \"": "present",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "fooer",
|
||||
Labels: map[string]string{
|
||||
"more complex label with \\ and \".post": "present",
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "baz",
|
||||
Other: "too complex, yo",
|
||||
},
|
||||
{
|
||||
Name: "bazo",
|
||||
Other: "abc",
|
||||
},
|
||||
}
|
||||
|
||||
var corpus []interface{}
|
||||
for _, entry := range corpusS {
|
||||
corpus = append(corpus, entry)
|
||||
}
|
||||
|
||||
// adapt shows an example of how to build an adaptor function for a type.
|
||||
adapt := func(o interface{}) Adaptor {
|
||||
obj := o.(cEntry)
|
||||
return AdapterFunc(func(fieldpath []string) (string, bool) {
|
||||
switch fieldpath[0] {
|
||||
case "name":
|
||||
return obj.Name, len(obj.Name) > 0
|
||||
case "other":
|
||||
return obj.Other, len(obj.Other) > 0
|
||||
case "labels":
|
||||
value, ok := obj.Labels[strings.Join(fieldpath[1:], ".")]
|
||||
return value, ok
|
||||
}
|
||||
|
||||
return "", false
|
||||
})
|
||||
}
|
||||
|
||||
for _, testcase := range []struct {
|
||||
name string
|
||||
input string
|
||||
expected []interface{}
|
||||
errString string
|
||||
}{
|
||||
{
|
||||
name: "Empty",
|
||||
input: "",
|
||||
expected: corpus,
|
||||
},
|
||||
{
|
||||
name: "Present",
|
||||
input: "name",
|
||||
expected: corpus,
|
||||
},
|
||||
{
|
||||
name: "LabelPresent",
|
||||
input: "labels.foo",
|
||||
expected: []interface{}{
|
||||
corpus[0],
|
||||
corpus[2],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LabelValue",
|
||||
input: "labels.foo==true",
|
||||
expected: []interface{}{
|
||||
corpus[0],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Name",
|
||||
input: "name==bar",
|
||||
expected: []interface{}{
|
||||
corpus[1],
|
||||
corpus[3],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NameNotEqual",
|
||||
input: "name!=bar",
|
||||
expected: []interface{}{
|
||||
corpus[0],
|
||||
corpus[2],
|
||||
corpus[4],
|
||||
corpus[5],
|
||||
corpus[6],
|
||||
corpus[7],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NameAndLabelPresent",
|
||||
input: "name==bar,labels.bar",
|
||||
expected: []interface{}{
|
||||
corpus[3],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "QuotedValue",
|
||||
input: "other==\"too complex, yo\"",
|
||||
expected: []interface{}{
|
||||
corpus[6],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "RegexpValue",
|
||||
input: "other~=[abc]+,name!=foo",
|
||||
expected: []interface{}{
|
||||
corpus[6],
|
||||
corpus[7],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NameAndLabelValue",
|
||||
input: "name==bar,labels.bar==true",
|
||||
expected: []interface{}{
|
||||
corpus[3],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NameAndLabelValueNoMatch",
|
||||
input: "name==bar,labels.bar==wrong",
|
||||
},
|
||||
{
|
||||
name: "LabelQuotedFieldPathPresent",
|
||||
input: `name==foo,labels."more complex label"`,
|
||||
expected: []interface{}{
|
||||
corpus[2],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LabelQuotedFieldPathPresentWithQuoted",
|
||||
input: `labels."more complex label with \\ and \""==present`,
|
||||
expected: []interface{}{
|
||||
corpus[4],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LabelQuotedFieldPathPresentWithQuotedEmbed",
|
||||
input: `labels."more complex label with \\ and \"".post==present`,
|
||||
expected: []interface{}{
|
||||
corpus[5],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LabelQuotedFieldPathPresentWithQuotedEmbedInvalid",
|
||||
input: `labels.?"more complex label with \\ and \"".post==present`,
|
||||
errString: `filters: parse error: [labels. >|?|< "more complex label with \\ and \"".post==present]: expected field or quoted`,
|
||||
},
|
||||
{
|
||||
name: "TrailingComma",
|
||||
input: "name==foo,",
|
||||
errString: `filters: parse error: [name==foo,]: expected field or quoted`,
|
||||
},
|
||||
{
|
||||
name: "TrailingFieldSeparator",
|
||||
input: "labels.",
|
||||
errString: `filters: parse error: [labels.]: expected field or quoted`,
|
||||
},
|
||||
{
|
||||
name: "MissingValue",
|
||||
input: "image~=,id?=?fbaq",
|
||||
errString: `filters: parse error: [image~= >|,|< id?=?fbaq]: expected value or quoted`,
|
||||
},
|
||||
} {
|
||||
t.Run(testcase.name, func(t *testing.T) {
|
||||
t.Logf("testcase: %q", testcase.input)
|
||||
filter, err := Parse(testcase.input)
|
||||
if testcase.errString != "" {
|
||||
if err == nil {
|
||||
t.Fatalf("expected an error, but received nil")
|
||||
}
|
||||
if err.Error() != testcase.errString {
|
||||
t.Fatalf("error %v != %v", err, testcase.errString)
|
||||
}
|
||||
|
||||
return
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if filter == nil {
|
||||
t.Fatal("filter should not be nil")
|
||||
}
|
||||
|
||||
t.Log("filter", filter)
|
||||
var results []interface{}
|
||||
for _, item := range corpus {
|
||||
adaptor := adapt(item)
|
||||
if filter.Match(adaptor) {
|
||||
results = append(results, item)
|
||||
}
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(results, testcase.expected) {
|
||||
t.Fatalf("%q: %#v != %#v", testcase.input, results, testcase.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
239
filters/parser.go
Normal file
239
filters/parser.go
Normal file
@ -0,0 +1,239 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
/*
|
||||
Parse the strings into a filter that may be used with an adaptor.
|
||||
|
||||
The filter is made up of zero or more selectors.
|
||||
|
||||
The format is a comma separated list of expressions, in the form of
|
||||
`<fieldpath><op><value>`, known as selectors. All selectors must match the
|
||||
target object for the filter to be true.
|
||||
|
||||
We define the operators "==" for equality, "!=" for not equal and "~=" for a
|
||||
regular expression. If the operator and value are not present, the matcher will
|
||||
test for the presence of a value, as defined by the target object.
|
||||
|
||||
The formal grammar is as follows:
|
||||
|
||||
selectors := selector ("," selector)*
|
||||
selector := fieldpath (operator value)
|
||||
fieldpath := field ('.' field)*
|
||||
field := quoted | [A-Za-z] [A-Za-z0-9_]+
|
||||
operator := "==" | "!=" | "~="
|
||||
value := quoted | [^\s,]+
|
||||
quoted := <go string syntax>
|
||||
|
||||
*/
|
||||
func Parse(s string) (Filter, error) {
|
||||
// special case empty to match all
|
||||
if s == "" {
|
||||
return Always, nil
|
||||
}
|
||||
|
||||
p := parser{input: s}
|
||||
return p.parse()
|
||||
}
|
||||
|
||||
type parser struct {
|
||||
input string
|
||||
scanner scanner
|
||||
}
|
||||
|
||||
func (p *parser) parse() (Filter, error) {
|
||||
p.scanner.init(p.input)
|
||||
|
||||
ss, err := p.selectors()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "filters")
|
||||
}
|
||||
|
||||
return ss, nil
|
||||
}
|
||||
|
||||
func (p *parser) selectors() (Filter, error) {
|
||||
s, err := p.selector()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ss := All{s}
|
||||
|
||||
loop:
|
||||
for {
|
||||
tok := p.scanner.peek()
|
||||
switch tok {
|
||||
case ',':
|
||||
pos, tok, _ := p.scanner.scan()
|
||||
if tok != tokenSelectorSeparator {
|
||||
return nil, p.mkerr(pos, "expected a separator")
|
||||
}
|
||||
|
||||
s, err := p.selector()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ss = append(ss, s)
|
||||
case tokenEOF:
|
||||
break loop
|
||||
default:
|
||||
panic("unconsumed input")
|
||||
}
|
||||
}
|
||||
|
||||
return ss, nil
|
||||
}
|
||||
|
||||
func (p *parser) selector() (selector, error) {
|
||||
fieldpath, err := p.fieldpath()
|
||||
if err != nil {
|
||||
return selector{}, err
|
||||
}
|
||||
|
||||
switch p.scanner.peek() {
|
||||
case tokenSelectorSeparator, tokenEOF:
|
||||
return selector{
|
||||
fieldpath: fieldpath,
|
||||
operator: operatorPresent,
|
||||
}, nil
|
||||
}
|
||||
|
||||
op, err := p.operator()
|
||||
if err != nil {
|
||||
return selector{}, err
|
||||
}
|
||||
|
||||
value, err := p.value()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return selector{}, io.ErrUnexpectedEOF
|
||||
}
|
||||
return selector{}, err
|
||||
}
|
||||
|
||||
return selector{
|
||||
fieldpath: fieldpath,
|
||||
value: value,
|
||||
operator: op,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *parser) fieldpath() ([]string, error) {
|
||||
f, err := p.field()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fs := []string{f}
|
||||
loop:
|
||||
for {
|
||||
tok := p.scanner.peek() // lookahead to consume field separtor
|
||||
|
||||
switch tok {
|
||||
case '.':
|
||||
pos, tok, _ := p.scanner.scan() // consume separator
|
||||
if tok != tokenFieldSeparator {
|
||||
return nil, p.mkerr(pos, "expected a field separator (`.`)")
|
||||
}
|
||||
|
||||
f, err := p.field()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fs = append(fs, f)
|
||||
default:
|
||||
// let the layer above handle the other bad cases.
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
func (p *parser) field() (string, error) {
|
||||
pos, tok, s := p.scanner.scan()
|
||||
switch tok {
|
||||
case tokenField:
|
||||
return s, nil
|
||||
case tokenQuoted:
|
||||
return p.unquote(pos, s)
|
||||
}
|
||||
|
||||
return "", p.mkerr(pos, "expected field or quoted")
|
||||
}
|
||||
|
||||
func (p *parser) operator() (operator, error) {
|
||||
pos, tok, s := p.scanner.scan()
|
||||
switch tok {
|
||||
case tokenOperator:
|
||||
switch s {
|
||||
case "==":
|
||||
return operatorEqual, nil
|
||||
case "!=":
|
||||
return operatorNotEqual, nil
|
||||
case "~=":
|
||||
return operatorMatches, nil
|
||||
default:
|
||||
return 0, p.mkerr(pos, "unsupported operator %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, p.mkerr(pos, `expected an operator ("=="|"!="|"~=")`)
|
||||
}
|
||||
|
||||
func (p *parser) value() (string, error) {
|
||||
pos, tok, s := p.scanner.scan()
|
||||
|
||||
switch tok {
|
||||
case tokenValue, tokenField:
|
||||
return s, nil
|
||||
case tokenQuoted:
|
||||
return p.unquote(pos, s)
|
||||
}
|
||||
|
||||
return "", p.mkerr(pos, "expected value or quoted")
|
||||
}
|
||||
|
||||
func (p *parser) unquote(pos int, s string) (string, error) {
|
||||
uq, err := strconv.Unquote(s)
|
||||
if err != nil {
|
||||
return "", p.mkerr(pos, "unquoting failed: %v", err)
|
||||
}
|
||||
|
||||
return uq, nil
|
||||
}
|
||||
|
||||
type parseError struct {
|
||||
input string
|
||||
pos int
|
||||
msg string
|
||||
}
|
||||
|
||||
func (pe parseError) Error() string {
|
||||
if pe.pos < len(pe.input) {
|
||||
before := pe.input[:pe.pos]
|
||||
location := pe.input[pe.pos : pe.pos+1] // need to handle end
|
||||
after := pe.input[pe.pos+1:]
|
||||
|
||||
return fmt.Sprintf("[%s >|%s|< %s]: %v", before, location, after, pe.msg)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("[%s]: %v", pe.input, pe.msg)
|
||||
}
|
||||
|
||||
func (p *parser) mkerr(pos int, format string, args ...interface{}) error {
|
||||
return errors.Wrap(parseError{
|
||||
input: p.input,
|
||||
pos: pos,
|
||||
msg: fmt.Sprintf(format, args...),
|
||||
}, "parse error")
|
||||
}
|
279
filters/scanner.go
Normal file
279
filters/scanner.go
Normal file
@ -0,0 +1,279 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
tokenEOF = -(iota + 1)
|
||||
tokenQuoted
|
||||
tokenValue
|
||||
tokenField
|
||||
tokenFieldSeparator
|
||||
tokenOperator
|
||||
tokenSelectorSeparator
|
||||
tokenIllegal
|
||||
)
|
||||
|
||||
type token rune
|
||||
|
||||
func (t token) String() string {
|
||||
switch t {
|
||||
case tokenEOF:
|
||||
return "EOF"
|
||||
case tokenQuoted:
|
||||
return "Quoted"
|
||||
case tokenValue:
|
||||
return "Value"
|
||||
case tokenField:
|
||||
return "Field"
|
||||
case tokenOperator:
|
||||
return "Operator"
|
||||
case tokenFieldSeparator:
|
||||
return "FieldSeparator"
|
||||
case tokenSelectorSeparator:
|
||||
return "SelectorSeparator"
|
||||
case tokenIllegal:
|
||||
return "Illegal"
|
||||
}
|
||||
|
||||
return string(t)
|
||||
}
|
||||
|
||||
func (t token) GoString() string {
|
||||
return "token" + t.String()
|
||||
}
|
||||
|
||||
type scanner struct {
|
||||
input string
|
||||
pos int
|
||||
ppos int // bounds the current rune in the string
|
||||
value bool
|
||||
}
|
||||
|
||||
func (s *scanner) init(input string) {
|
||||
s.input = input
|
||||
s.pos = 0
|
||||
s.ppos = 0
|
||||
}
|
||||
|
||||
func (s *scanner) next() rune {
|
||||
if s.pos >= len(s.input) {
|
||||
return tokenEOF
|
||||
}
|
||||
s.pos = s.ppos
|
||||
|
||||
r, w := utf8.DecodeRuneInString(s.input[s.ppos:])
|
||||
s.ppos += w
|
||||
if r == utf8.RuneError {
|
||||
if w > 0 {
|
||||
return tokenIllegal
|
||||
} else {
|
||||
return tokenEOF
|
||||
}
|
||||
}
|
||||
|
||||
if r == 0 {
|
||||
return tokenIllegal
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
func (s *scanner) peek() rune {
|
||||
pos := s.pos
|
||||
ppos := s.ppos
|
||||
ch := s.next()
|
||||
s.pos = pos
|
||||
s.ppos = ppos
|
||||
return ch
|
||||
}
|
||||
|
||||
func (s *scanner) scan() (int, token, string) {
|
||||
var (
|
||||
ch = s.next()
|
||||
pos = s.pos
|
||||
)
|
||||
|
||||
chomp:
|
||||
switch {
|
||||
case ch == tokenEOF:
|
||||
case ch == tokenIllegal:
|
||||
case isQuoteRune(ch):
|
||||
s.scanString(ch)
|
||||
return pos, tokenQuoted, s.input[pos:s.ppos]
|
||||
case ch == ',':
|
||||
return pos, tokenSelectorSeparator, s.input[pos:s.ppos]
|
||||
case ch == '.':
|
||||
return pos, tokenFieldSeparator, s.input[pos:s.ppos]
|
||||
case isOperatorRune(ch):
|
||||
s.scanOperator()
|
||||
s.value = true
|
||||
return pos, tokenOperator, s.input[pos:s.ppos]
|
||||
case unicode.IsSpace(ch):
|
||||
// chomp
|
||||
ch = s.next()
|
||||
pos = s.pos
|
||||
goto chomp
|
||||
case s.value:
|
||||
s.scanValue()
|
||||
|
||||
// TODO(stevvooe): We can get rid of the value flag by by having a
|
||||
// scanUnquoted that accumulates characters. If it is a legal field,
|
||||
// then we return a field token. The parser can then treat fields as
|
||||
// values. This will allow the default case here to just scan value or
|
||||
// field.
|
||||
s.value = false
|
||||
return pos, tokenValue, s.input[pos:s.ppos]
|
||||
case isFieldRune(ch):
|
||||
s.scanField()
|
||||
return pos, tokenField, s.input[pos:s.ppos]
|
||||
}
|
||||
|
||||
return s.pos, token(ch), ""
|
||||
}
|
||||
|
||||
func (s *scanner) scanField() {
|
||||
for {
|
||||
ch := s.peek()
|
||||
if !isFieldRune(ch) {
|
||||
break
|
||||
}
|
||||
s.next()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) scanOperator() {
|
||||
for {
|
||||
ch := s.peek()
|
||||
switch ch {
|
||||
case '=', '!', '~':
|
||||
s.next()
|
||||
default:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) scanValue() {
|
||||
for {
|
||||
ch := s.peek()
|
||||
if !isValueRune(ch) {
|
||||
break
|
||||
}
|
||||
s.next()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) scanString(quote rune) {
|
||||
ch := s.next() // read character after quote
|
||||
for ch != quote {
|
||||
if ch == '\n' || ch < 0 {
|
||||
s.error("literal not terminated")
|
||||
return
|
||||
}
|
||||
if ch == '\\' {
|
||||
ch = s.scanEscape(quote)
|
||||
} else {
|
||||
ch = s.next()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (s *scanner) scanEscape(quote rune) rune {
|
||||
ch := s.next() // read character after '/'
|
||||
switch ch {
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
|
||||
// nothing to do
|
||||
ch = s.next()
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
ch = s.scanDigits(ch, 8, 3)
|
||||
case 'x':
|
||||
ch = s.scanDigits(s.next(), 16, 2)
|
||||
case 'u':
|
||||
ch = s.scanDigits(s.next(), 16, 4)
|
||||
case 'U':
|
||||
ch = s.scanDigits(s.next(), 16, 8)
|
||||
default:
|
||||
s.error("illegal char escape")
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
func (s *scanner) scanDigits(ch rune, base, n int) rune {
|
||||
for n > 0 && digitVal(ch) < base {
|
||||
ch = s.next()
|
||||
n--
|
||||
}
|
||||
if n > 0 {
|
||||
s.error("illegal char escape")
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
func (s *scanner) error(msg string) {
|
||||
fmt.Println("error fixme", msg)
|
||||
}
|
||||
|
||||
func digitVal(ch rune) int {
|
||||
switch {
|
||||
case '0' <= ch && ch <= '9':
|
||||
return int(ch - '0')
|
||||
case 'a' <= ch && ch <= 'f':
|
||||
return int(ch - 'a' + 10)
|
||||
case 'A' <= ch && ch <= 'F':
|
||||
return int(ch - 'A' + 10)
|
||||
}
|
||||
return 16 // larger than any legal digit val
|
||||
}
|
||||
|
||||
func isFieldRune(r rune) bool {
|
||||
return (r == '_' || isAlphaRune(r) || isDigitRune(r))
|
||||
}
|
||||
|
||||
func isAlphaRune(r rune) bool {
|
||||
return r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z'
|
||||
}
|
||||
|
||||
func isDigitRune(r rune) bool {
|
||||
return r >= '0' && r <= '9'
|
||||
}
|
||||
|
||||
func isOperatorRune(r rune) bool {
|
||||
switch r {
|
||||
case '=', '!', '~':
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isQuoteRune(r rune) bool {
|
||||
switch r {
|
||||
case '"': // maybe add single quoting?
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isSeparatorRune(r rune) bool {
|
||||
switch r {
|
||||
case ',', '.':
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isValueRune(r rune) bool {
|
||||
return r != ',' && !unicode.IsSpace(r) &&
|
||||
(unicode.IsLetter(r) ||
|
||||
unicode.IsDigit(r) ||
|
||||
unicode.IsNumber(r) ||
|
||||
unicode.IsGraphic(r) ||
|
||||
unicode.IsPunct(r))
|
||||
}
|
236
filters/scanner_test.go
Normal file
236
filters/scanner_test.go
Normal file
@ -0,0 +1,236 @@
|
||||
package filters
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type tokenResult struct {
|
||||
pos int
|
||||
token token
|
||||
text string
|
||||
}
|
||||
|
||||
func (tr tokenResult) String() string {
|
||||
return fmt.Sprintf("{pos: %v, token: %v, text: %q}", tr.pos, tr.token, tr.text)
|
||||
}
|
||||
|
||||
func TestScanner(t *testing.T) {
|
||||
|
||||
for _, testcase := range []struct {
|
||||
name string
|
||||
input string
|
||||
expected []tokenResult
|
||||
}{
|
||||
{
|
||||
name: "Field",
|
||||
input: "name",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SelectorsWithOperators",
|
||||
input: "name==value,foo!=bar",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenOperator, text: "=="},
|
||||
{pos: 6, token: tokenValue, text: "value"},
|
||||
{pos: 11, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 12, token: tokenField, text: "foo"},
|
||||
{pos: 15, token: tokenOperator, text: "!="},
|
||||
{pos: 17, token: tokenValue, text: "bar"},
|
||||
{pos: 20, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SelectorsWithFieldPaths",
|
||||
input: "name==value,labels.foo=value,other.bar~=match",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenOperator, text: "=="},
|
||||
{pos: 6, token: tokenValue, text: "value"},
|
||||
{pos: 11, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 12, token: tokenField, text: "labels"},
|
||||
{pos: 18, token: tokenFieldSeparator, text: "."},
|
||||
{pos: 19, token: tokenField, text: "foo"},
|
||||
{pos: 22, token: tokenOperator, text: "="},
|
||||
{pos: 23, token: tokenValue, text: "value"},
|
||||
{pos: 28, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 29, token: tokenField, text: "other"},
|
||||
{pos: 34, token: tokenFieldSeparator, text: "."},
|
||||
{pos: 35, token: tokenField, text: "bar"},
|
||||
{pos: 38, token: tokenOperator, text: "~="},
|
||||
{pos: 40, token: tokenValue, text: "match"},
|
||||
{pos: 45, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "RegexpValue",
|
||||
input: "name~=[abc]+,foo=test",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenOperator, text: "~="},
|
||||
{pos: 6, token: tokenValue, text: "[abc]+"},
|
||||
{pos: 12, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 13, token: tokenField, text: "foo"},
|
||||
{pos: 16, token: tokenOperator, text: "="},
|
||||
{pos: 17, token: tokenValue, text: "test"},
|
||||
{pos: 21, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "RegexpEscapedValue",
|
||||
input: `name~=[abc]\+,foo=test`,
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenOperator, text: "~="},
|
||||
{pos: 6, token: tokenValue, text: "[abc]\\+"},
|
||||
{pos: 13, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 14, token: tokenField, text: "foo"},
|
||||
{pos: 17, token: tokenOperator, text: "="},
|
||||
{pos: 18, token: tokenValue, text: "test"},
|
||||
{pos: 22, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Cowsay",
|
||||
input: "name~=牛,labels.moo=true",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenOperator, text: "~="},
|
||||
{pos: 6, token: tokenValue, text: "牛"},
|
||||
{pos: 9, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 10, token: tokenField, text: "labels"},
|
||||
{pos: 16, token: tokenFieldSeparator, text: "."},
|
||||
{pos: 17, token: tokenField, text: "moo"},
|
||||
{pos: 20, token: tokenOperator, text: "="},
|
||||
{pos: 21, token: tokenValue, text: "true"},
|
||||
{pos: 25, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Escapes",
|
||||
input: `name~="asdf\n\tfooo"`,
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "name"},
|
||||
{pos: 4, token: tokenOperator, text: "~="},
|
||||
{pos: 6, token: tokenQuoted, text: "\"asdf\\n\\tfooo\""},
|
||||
{pos: 20, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NullInput",
|
||||
input: "foo\x00bar",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "foo"},
|
||||
{pos: 3, token: tokenIllegal},
|
||||
{pos: 4, token: tokenField, text: "bar"},
|
||||
{pos: 7, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SpacesChomped",
|
||||
input: "foo = bar ",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "foo"},
|
||||
{pos: 4, token: tokenOperator, text: "="},
|
||||
{pos: 6, token: tokenValue, text: "bar"},
|
||||
{pos: 13, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "PartialInput",
|
||||
input: "interrupted=",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "interrupted"},
|
||||
{pos: 11, token: tokenOperator, text: "="},
|
||||
{pos: 12, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DoubleValue",
|
||||
input: "doublevalue=value value",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "doublevalue"},
|
||||
{pos: 11, token: tokenOperator, text: "="},
|
||||
{pos: 12, token: tokenValue, text: "value"},
|
||||
{pos: 18, token: tokenField, text: "value"},
|
||||
{pos: 23, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LeadingWithQuoted",
|
||||
input: `"leading quote".postquote==value`,
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenQuoted, text: "\"leading quote\""},
|
||||
{pos: 15, token: tokenFieldSeparator, text: "."},
|
||||
{pos: 16, token: tokenField, text: "postquote"},
|
||||
{pos: 25, token: tokenOperator, text: "=="},
|
||||
{pos: 27, token: tokenValue, text: "value"},
|
||||
{pos: 32, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MissingValue",
|
||||
input: "input==,id?=ff",
|
||||
expected: []tokenResult{
|
||||
{pos: 0, token: tokenField, text: "input"},
|
||||
{pos: 5, token: tokenOperator, text: "=="},
|
||||
{pos: 7, token: tokenSelectorSeparator, text: ","},
|
||||
{pos: 8, token: tokenValue, text: "id?=ff"},
|
||||
{pos: 14, token: tokenEOF},
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(testcase.name, func(t *testing.T) {
|
||||
var sc scanner
|
||||
sc.init(testcase.input)
|
||||
t.Logf("scan %q", testcase.input)
|
||||
|
||||
// If you leave the expected empty, the test case will just print
|
||||
// out the token stream, which you can paste into the testcase when
|
||||
// adding new cases.
|
||||
if len(testcase.expected) == 0 {
|
||||
fmt.Println("Name", testcase.name)
|
||||
}
|
||||
|
||||
for i := 0; ; i++ {
|
||||
pos, tok, s := sc.scan()
|
||||
t.Log("token", pos, tok, strconv.Quote(s))
|
||||
if len(testcase.expected) == 0 {
|
||||
if len(s) > 0 {
|
||||
fmt.Printf("{pos: %v, token: %#v, text: %q},\n", pos, tok, s)
|
||||
} else {
|
||||
fmt.Printf("{pos: %v, token: %#v},\n", pos, tok)
|
||||
}
|
||||
} else {
|
||||
tokv := tokenResult{pos: pos, token: tok, text: s}
|
||||
if i >= len(testcase.expected) {
|
||||
t.Fatalf("too many tokens parsed")
|
||||
}
|
||||
|
||||
if tokv != testcase.expected[i] {
|
||||
t.Fatalf("token unexpected: %v != %v", tokv, testcase.expected[i])
|
||||
}
|
||||
}
|
||||
|
||||
if tok == tokenEOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// make sure we've eof'd
|
||||
_, tok, _ := sc.scan()
|
||||
if tok != tokenEOF {
|
||||
t.Fatal("must consume all input")
|
||||
}
|
||||
|
||||
if len(testcase.expected) == 0 {
|
||||
t.Fatal("must define expected tokens")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user