Move filters to pkg/filters

Signed-off-by: Derek McGowan <derek@mcg.dev>
This commit is contained in:
Derek McGowan
2024-01-17 09:54:54 -08:00
parent 44a836c9b5
commit c38f2ab724
23 changed files with 16 additions and 16 deletions

View File

@@ -22,11 +22,11 @@ import (
"strings"
"time"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/identifiers"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/errdefs"
"github.com/containerd/containerd/v2/pkg/events"
"github.com/containerd/containerd/v2/pkg/filters"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
goevents "github.com/docker/go-events"

33
pkg/filters/adaptor.go Normal file
View File

@@ -0,0 +1,33 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
// Adaptor specifies the mapping of fieldpaths to a type. For the given field
// path, the value and whether it is present should be returned. The mapping of
// the fieldpath to a field is deferred to the adaptor implementation, but
// should generally follow protobuf field path/mask semantics.
type Adaptor interface {
Field(fieldpath []string) (value string, present bool)
}
// AdapterFunc allows implementation specific matching of fieldpaths
type AdapterFunc func(fieldpath []string) (string, bool)
// Field returns the field name and true if it exists
func (fn AdapterFunc) Field(fieldpath []string) (string, bool) {
return fn(fieldpath)
}

178
pkg/filters/filter.go Normal file
View File

@@ -0,0 +1,178 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package filters defines a syntax and parser that can be used for the
// filtration of items across the containerd API. The core is built on the
// concept of protobuf field paths, with quoting. Several operators allow the
// user to flexibly select items based on field presence, equality, inequality
// and regular expressions. Flexible adaptors support working with any type.
//
// The syntax is fairly familiar, if you've used container ecosystem
// projects. At the core, we base it on the concept of protobuf field
// paths, augmenting with the ability to quote portions of the field path
// to match arbitrary labels. These "selectors" come in the following
// syntax:
//
// ```
// <fieldpath>[<operator><value>]
// ```
//
// A basic example is as follows:
//
// ```
// name==foo
// ```
//
// This would match all objects that have a field `name` with the value
// `foo`. If we only want to test if the field is present, we can omit the
// operator. This is most useful for matching labels in containerd. The
// following will match objects that have the field "labels" and have the
// label "foo" defined:
//
// ```
// labels.foo
// ```
//
// We also allow for quoting of parts of the field path to allow matching
// of arbitrary items:
//
// ```
// labels."very complex label"==something
// ```
//
// We also define `!=` and `~=` as operators. The `!=` will match all
// objects that don't match the value for a field and `~=` will compile the
// target value as a regular expression and match the field value against that.
//
// Selectors can be combined using a comma, such that the resulting
// selector will require all selectors are matched for the object to match.
// The following example will match objects that are named `foo` and have
// the label `bar`:
//
// ```
// name==foo,labels.bar
// ```
package filters
import (
"regexp"
"github.com/containerd/log"
)
// Filter matches specific resources based the provided filter
type Filter interface {
Match(adaptor Adaptor) bool
}
// FilterFunc is a function that handles matching with an adaptor
type FilterFunc func(Adaptor) bool
// Match matches the FilterFunc returning true if the object matches the filter
func (fn FilterFunc) Match(adaptor Adaptor) bool {
return fn(adaptor)
}
// Always is a filter that always returns true for any type of object
var Always FilterFunc = func(adaptor Adaptor) bool {
return true
}
// Any allows multiple filters to be matched against the object
type Any []Filter
// Match returns true if any of the provided filters are true
func (m Any) Match(adaptor Adaptor) bool {
for _, m := range m {
if m.Match(adaptor) {
return true
}
}
return false
}
// All allows multiple filters to be matched against the object
type All []Filter
// Match only returns true if all filters match the object
func (m All) Match(adaptor Adaptor) bool {
for _, m := range m {
if !m.Match(adaptor) {
return false
}
}
return true
}
type operator int
const (
operatorPresent = iota
operatorEqual
operatorNotEqual
operatorMatches
)
func (op operator) String() string {
switch op {
case operatorPresent:
return "?"
case operatorEqual:
return "=="
case operatorNotEqual:
return "!="
case operatorMatches:
return "~="
}
return "unknown"
}
type selector struct {
fieldpath []string
operator operator
value string
re *regexp.Regexp
}
func (m selector) Match(adaptor Adaptor) bool {
value, present := adaptor.Field(m.fieldpath)
switch m.operator {
case operatorPresent:
return present
case operatorEqual:
return present && value == m.value
case operatorNotEqual:
return value != m.value
case operatorMatches:
if m.re == nil {
r, err := regexp.Compile(m.value)
if err != nil {
log.L.Errorf("error compiling regexp %q", m.value)
return false
}
m.re = r
}
return m.re.MatchString(value)
default:
return false
}
}

346
pkg/filters/filter_test.go Normal file
View File

@@ -0,0 +1,346 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
import (
"reflect"
"strings"
"testing"
)
func TestFilters(t *testing.T) {
type cEntry struct {
Name string
Other string
Labels map[string]string
}
corpusS := []cEntry{
{
Name: "foo",
Labels: map[string]string{
"foo": "true",
},
},
{
Name: "bar",
},
{
Name: "foo",
Labels: map[string]string{
"foo": "present",
"more complex label": "present",
},
},
{
Name: "bar",
Labels: map[string]string{
"bar": "true",
},
},
{
Name: "fooer",
Labels: map[string]string{
"more complex label with \\ and \"": "present",
},
},
{
Name: "fooer",
Labels: map[string]string{
"more complex label with \\ and \".post": "present",
},
},
{
Name: "baz",
Other: "too complex, yo",
},
{
Name: "bazo",
Other: "abc",
},
{
Name: "compound",
Labels: map[string]string{
"foo": "omg_asdf.asdf-qwer",
},
},
}
var corpus []interface{}
for _, entry := range corpusS {
corpus = append(corpus, entry)
}
// adapt shows an example of how to build an adaptor function for a type.
adapt := func(o interface{}) Adaptor {
obj := o.(cEntry)
return AdapterFunc(func(fieldpath []string) (string, bool) {
switch fieldpath[0] {
case "name":
return obj.Name, len(obj.Name) > 0
case "other":
return obj.Other, len(obj.Other) > 0
case "labels":
value, ok := obj.Labels[strings.Join(fieldpath[1:], ".")]
return value, ok
}
return "", false
})
}
for _, testcase := range []struct {
name string
input string
expected []interface{}
errString string
}{
{
name: "Empty",
input: "",
expected: corpus,
},
{
name: "Present",
input: "name",
expected: corpus,
},
{
name: "LabelPresent",
input: "labels.foo",
expected: []interface{}{
corpus[0],
corpus[2],
corpus[8],
},
},
{
name: "NameAndLabelPresent",
input: "labels.foo,name",
expected: []interface{}{
corpus[0],
corpus[2],
corpus[8],
},
},
{
name: "LabelValue",
input: "labels.foo==true",
expected: []interface{}{
corpus[0],
},
},
{
name: "LabelValuePunctuated",
input: "labels.foo==omg_asdf.asdf-qwer",
expected: []interface{}{
corpus[8],
},
},
{
name: "LabelValueNoAltQuoting",
input: "labels.|foo|==omg_asdf.asdf-qwer",
errString: "filters: parse error: [labels. >|||< foo|==omg_asdf.asdf-qwer]: invalid quote encountered",
},
{
name: "Name",
input: "name==bar",
expected: []interface{}{
corpus[1],
corpus[3],
},
},
{
name: "NameNotEqual",
input: "name!=bar",
expected: []interface{}{
corpus[0],
corpus[2],
corpus[4],
corpus[5],
corpus[6],
corpus[7],
corpus[8],
},
},
{
name: "NameAndLabelPresent",
input: "name==bar,labels.bar",
expected: []interface{}{
corpus[3],
},
},
{
name: "QuotedValue",
input: "other==\"too complex, yo\"",
expected: []interface{}{
corpus[6],
},
},
{
name: "RegexpValue",
input: "other~=[abc]+,name!=foo",
expected: []interface{}{
corpus[6],
corpus[7],
},
},
{
name: "RegexpQuotedValue",
input: "other~=/[abc]+/,name!=foo",
expected: []interface{}{
corpus[6],
corpus[7],
},
},
{
name: "RegexpQuotedValue",
input: "other~=/[abc]{1,2}/,name!=foo",
expected: []interface{}{
corpus[6],
corpus[7],
},
},
{
name: "RegexpQuotedValueGarbage",
input: "other~=/[abc]{0,1}\"\\//,name!=foo",
// valid syntax, but doesn't match anything
},
{
name: "NameAndLabelValue",
input: "name==bar,labels.bar==true",
expected: []interface{}{
corpus[3],
},
},
{
name: "NameAndLabelValueNoMatch",
input: "name==bar,labels.bar==wrong",
},
{
name: "LabelQuotedFieldPathPresent",
input: `name==foo,labels."more complex label"`,
expected: []interface{}{
corpus[2],
},
},
{
name: "LabelQuotedFieldPathPresentWithQuoted",
input: `labels."more complex label with \\ and \""==present`,
expected: []interface{}{
corpus[4],
},
},
{
name: "LabelQuotedFieldPathPresentWithQuotedEmbed",
input: `labels."more complex label with \\ and \"".post==present`,
expected: []interface{}{
corpus[5],
},
},
{
name: "LabelQuotedFieldPathPresentWithQuotedEmbedInvalid",
input: `labels.?"more complex label with \\ and \"".post==present`,
errString: `filters: parse error: [labels. >|?|< "more complex label with \\ and \"".post==present]: expected field or quoted`,
},
{
name: "TrailingComma",
input: "name==foo,",
errString: `filters: parse error: [name==foo,]: expected field or quoted`,
},
{
name: "TrailingFieldSeparator",
input: "labels.",
errString: `filters: parse error: [labels.]: expected field or quoted`,
},
{
name: "MissingValue",
input: "image~=,id?=?fbaq",
errString: `filters: parse error: [image~= >|,|< id?=?fbaq]: expected value or quoted`,
},
{
name: "FieldQuotedLiteralNotTerminated",
input: "labels.ns/key==value",
errString: `filters: parse error: [labels.ns >|/|< key==value]: quoted literal not terminated`,
},
{
name: "ValueQuotedLiteralNotTerminated",
input: "labels.key==/value",
errString: `filters: parse error: [labels.key== >|/|< value]: quoted literal not terminated`,
},
} {
t.Run(testcase.name, func(t *testing.T) {
filter, err := Parse(testcase.input)
if testcase.errString != "" {
if err == nil {
t.Fatalf("expected an error, but received nil")
}
if err.Error() != testcase.errString {
t.Fatalf("error %v != %v", err, testcase.errString)
}
return
}
if err != nil {
t.Fatal(err)
}
if filter == nil {
t.Fatal("filter should not be nil")
}
var results []interface{}
for _, item := range corpus {
adaptor := adapt(item)
if filter.Match(adaptor) {
results = append(results, item)
}
}
if !reflect.DeepEqual(results, testcase.expected) {
t.Fatalf("%q: %#v != %#v", testcase.input, results, testcase.expected)
}
})
}
}
func TestOperatorStrings(t *testing.T) {
for _, testcase := range []struct {
op operator
expected string
}{
{operatorPresent, "?"},
{operatorEqual, "=="},
{operatorNotEqual, "!="},
{operatorMatches, "~="},
{10, "unknown"},
} {
if !reflect.DeepEqual(testcase.op.String(), testcase.expected) {
t.Fatalf("return value unexpected: %v != %v", testcase.op.String(), testcase.expected)
}
}
}
func FuzzFiltersParse(f *testing.F) {
f.Add("foo=bar")
f.Fuzz(func(t *testing.T, expr string) {
filter, err := Parse(expr)
if filter != nil && err != nil {
t.Fatal("either filter or err must be non-nil")
}
})
}

290
pkg/filters/parser.go Normal file
View File

@@ -0,0 +1,290 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
import (
"fmt"
"io"
"github.com/containerd/containerd/v2/pkg/errdefs"
)
/*
Parse the strings into a filter that may be used with an adaptor.
The filter is made up of zero or more selectors.
The format is a comma separated list of expressions, in the form of
`<fieldpath><op><value>`, known as selectors. All selectors must match the
target object for the filter to be true.
We define the operators "==" for equality, "!=" for not equal and "~=" for a
regular expression. If the operator and value are not present, the matcher will
test for the presence of a value, as defined by the target object.
The formal grammar is as follows:
selectors := selector ("," selector)*
selector := fieldpath (operator value)
fieldpath := field ('.' field)*
field := quoted | [A-Za-z] [A-Za-z0-9_]+
operator := "==" | "!=" | "~="
value := quoted | [^\s,]+
quoted := <go string syntax>
*/
func Parse(s string) (Filter, error) {
// special case empty to match all
if s == "" {
return Always, nil
}
p := parser{input: s}
return p.parse()
}
// ParseAll parses each filter in ss and returns a filter that will return true
// if any filter matches the expression.
//
// If no filters are provided, the filter will match anything.
func ParseAll(ss ...string) (Filter, error) {
if len(ss) == 0 {
return Always, nil
}
var fs []Filter
for _, s := range ss {
f, err := Parse(s)
if err != nil {
return nil, fmt.Errorf("%s: %w", err.Error(), errdefs.ErrInvalidArgument)
}
fs = append(fs, f)
}
return Any(fs), nil
}
type parser struct {
input string
scanner scanner
}
func (p *parser) parse() (Filter, error) {
p.scanner.init(p.input)
ss, err := p.selectors()
if err != nil {
return nil, fmt.Errorf("filters: %w", err)
}
return ss, nil
}
func (p *parser) selectors() (Filter, error) {
s, err := p.selector()
if err != nil {
return nil, err
}
ss := All{s}
loop:
for {
tok := p.scanner.peek()
switch tok {
case ',':
pos, tok, _ := p.scanner.scan()
if tok != tokenSeparator {
return nil, p.mkerr(pos, "expected a separator")
}
s, err := p.selector()
if err != nil {
return nil, err
}
ss = append(ss, s)
case tokenEOF:
break loop
default:
return nil, p.mkerr(p.scanner.ppos, "unexpected input: %v", string(tok))
}
}
return ss, nil
}
func (p *parser) selector() (selector, error) {
fieldpath, err := p.fieldpath()
if err != nil {
return selector{}, err
}
switch p.scanner.peek() {
case ',', tokenSeparator, tokenEOF:
return selector{
fieldpath: fieldpath,
operator: operatorPresent,
}, nil
}
op, err := p.operator()
if err != nil {
return selector{}, err
}
var allowAltQuotes bool
if op == operatorMatches {
allowAltQuotes = true
}
value, err := p.value(allowAltQuotes)
if err != nil {
if err == io.EOF {
return selector{}, io.ErrUnexpectedEOF
}
return selector{}, err
}
return selector{
fieldpath: fieldpath,
value: value,
operator: op,
}, nil
}
func (p *parser) fieldpath() ([]string, error) {
f, err := p.field()
if err != nil {
return nil, err
}
fs := []string{f}
loop:
for {
tok := p.scanner.peek() // lookahead to consume field separator
switch tok {
case '.':
pos, tok, _ := p.scanner.scan() // consume separator
if tok != tokenSeparator {
return nil, p.mkerr(pos, "expected a field separator (`.`)")
}
f, err := p.field()
if err != nil {
return nil, err
}
fs = append(fs, f)
default:
// let the layer above handle the other bad cases.
break loop
}
}
return fs, nil
}
func (p *parser) field() (string, error) {
pos, tok, s := p.scanner.scan()
switch tok {
case tokenField:
return s, nil
case tokenQuoted:
return p.unquote(pos, s, false)
case tokenIllegal:
return "", p.mkerr(pos, p.scanner.err)
}
return "", p.mkerr(pos, "expected field or quoted")
}
func (p *parser) operator() (operator, error) {
pos, tok, s := p.scanner.scan()
switch tok {
case tokenOperator:
switch s {
case "==":
return operatorEqual, nil
case "!=":
return operatorNotEqual, nil
case "~=":
return operatorMatches, nil
default:
return 0, p.mkerr(pos, "unsupported operator %q", s)
}
case tokenIllegal:
return 0, p.mkerr(pos, p.scanner.err)
}
return 0, p.mkerr(pos, `expected an operator ("=="|"!="|"~=")`)
}
func (p *parser) value(allowAltQuotes bool) (string, error) {
pos, tok, s := p.scanner.scan()
switch tok {
case tokenValue, tokenField:
return s, nil
case tokenQuoted:
return p.unquote(pos, s, allowAltQuotes)
case tokenIllegal:
return "", p.mkerr(pos, p.scanner.err)
}
return "", p.mkerr(pos, "expected value or quoted")
}
func (p *parser) unquote(pos int, s string, allowAlts bool) (string, error) {
if !allowAlts && s[0] != '\'' && s[0] != '"' {
return "", p.mkerr(pos, "invalid quote encountered")
}
uq, err := unquote(s)
if err != nil {
return "", p.mkerr(pos, "unquoting failed: %v", err)
}
return uq, nil
}
type parseError struct {
input string
pos int
msg string
}
func (pe parseError) Error() string {
if pe.pos < len(pe.input) {
before := pe.input[:pe.pos]
location := pe.input[pe.pos : pe.pos+1] // need to handle end
after := pe.input[pe.pos+1:]
return fmt.Sprintf("[%s >|%s|< %s]: %v", before, location, after, pe.msg)
}
return fmt.Sprintf("[%s]: %v", pe.input, pe.msg)
}
func (p *parser) mkerr(pos int, format string, args ...interface{}) error {
return fmt.Errorf("parse error: %w", parseError{
input: p.input,
pos: pos,
msg: fmt.Sprintf(format, args...),
})
}

252
pkg/filters/quote.go Normal file
View File

@@ -0,0 +1,252 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
import (
"errors"
"unicode/utf8"
)
// NOTE(stevvooe): Most of this code in this file is copied from the stdlib
// strconv package and modified to be able to handle quoting with `/` and `|`
// as delimiters. The copyright is held by the Go authors.
var errQuoteSyntax = errors.New("quote syntax error")
// UnquoteChar decodes the first character or byte in the escaped string
// or character literal represented by the string s.
// It returns four values:
//
// 1. value, the decoded Unicode code point or byte value;
// 2. multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
// 3. tail, the remainder of the string after the character; and
// 4. an error that will be nil if the character is syntactically valid.
//
// The second argument, quote, specifies the type of literal being parsed
// and therefore which escaped quote character is permitted.
// If set to a single quote, it permits the sequence \' and disallows unescaped '.
// If set to a double quote, it permits \" and disallows unescaped ".
// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
//
// This is from Go strconv package, modified to support `|` and `/` as double
// quotes for use with regular expressions.
func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
// easy cases
switch c := s[0]; {
case c == quote && (quote == '\'' || quote == '"' || quote == '/' || quote == '|'):
err = errQuoteSyntax
return
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s)
return r, true, s[size:], nil
case c != '\\':
return rune(s[0]), false, s[1:], nil
}
// hard case: c is backslash
if len(s) <= 1 {
err = errQuoteSyntax
return
}
c := s[1]
s = s[2:]
switch c {
case 'a':
value = '\a'
case 'b':
value = '\b'
case 'f':
value = '\f'
case 'n':
value = '\n'
case 'r':
value = '\r'
case 't':
value = '\t'
case 'v':
value = '\v'
case 'x', 'u', 'U':
n := 0
switch c {
case 'x':
n = 2
case 'u':
n = 4
case 'U':
n = 8
}
var v rune
if len(s) < n {
err = errQuoteSyntax
return
}
for j := 0; j < n; j++ {
x, ok := unhex(s[j])
if !ok {
err = errQuoteSyntax
return
}
v = v<<4 | x
}
s = s[n:]
if c == 'x' {
// single-byte string, possibly not UTF-8
value = v
break
}
if v > utf8.MaxRune {
err = errQuoteSyntax
return
}
value = v
multibyte = true
case '0', '1', '2', '3', '4', '5', '6', '7':
v := rune(c) - '0'
if len(s) < 2 {
err = errQuoteSyntax
return
}
for j := 0; j < 2; j++ { // one digit already; two more
x := rune(s[j]) - '0'
if x < 0 || x > 7 {
err = errQuoteSyntax
return
}
v = (v << 3) | x
}
s = s[2:]
if v > 255 {
err = errQuoteSyntax
return
}
value = v
case '\\':
value = '\\'
case '\'', '"', '|', '/':
if c != quote {
err = errQuoteSyntax
return
}
value = rune(c)
default:
err = errQuoteSyntax
return
}
tail = s
return
}
// unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
//
// This is modified from the standard library to support `|` and `/` as quote
// characters for use with regular expressions.
func unquote(s string) (string, error) {
n := len(s)
if n < 2 {
return "", errQuoteSyntax
}
quote := s[0]
if quote != s[n-1] {
return "", errQuoteSyntax
}
s = s[1 : n-1]
if quote == '`' {
if contains(s, '`') {
return "", errQuoteSyntax
}
if contains(s, '\r') {
// -1 because we know there is at least one \r to remove.
buf := make([]byte, 0, len(s)-1)
for i := 0; i < len(s); i++ {
if s[i] != '\r' {
buf = append(buf, s[i])
}
}
return string(buf), nil
}
return s, nil
}
if quote != '"' && quote != '\'' && quote != '|' && quote != '/' {
return "", errQuoteSyntax
}
if contains(s, '\n') {
return "", errQuoteSyntax
}
// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) {
switch quote {
case '"', '/', '|': // pipe and slash are treated like double quote
return s, nil
case '\'':
r, size := utf8.DecodeRuneInString(s)
if size == len(s) && (r != utf8.RuneError || size != 1) {
return s, nil
}
}
}
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := unquoteChar(s, quote)
if err != nil {
return "", err
}
s = ss
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return "", errQuoteSyntax
}
}
return string(buf), nil
}
// contains reports whether the string contains the byte c.
func contains(s string, c byte) bool {
for i := 0; i < len(s); i++ {
if s[i] == c {
return true
}
}
return false
}
func unhex(b byte) (v rune, ok bool) {
c := rune(b)
switch {
case '0' <= c && c <= '9':
return c - '0', true
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true
}
return
}

297
pkg/filters/scanner.go Normal file
View File

@@ -0,0 +1,297 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
import (
"unicode"
"unicode/utf8"
)
const (
tokenEOF = -(iota + 1)
tokenQuoted
tokenValue
tokenField
tokenSeparator
tokenOperator
tokenIllegal
)
type token rune
func (t token) String() string {
switch t {
case tokenEOF:
return "EOF"
case tokenQuoted:
return "Quoted"
case tokenValue:
return "Value"
case tokenField:
return "Field"
case tokenSeparator:
return "Separator"
case tokenOperator:
return "Operator"
case tokenIllegal:
return "Illegal"
}
return string(t)
}
func (t token) GoString() string {
return "token" + t.String()
}
type scanner struct {
input string
pos int
ppos int // bounds the current rune in the string
value bool
err string
}
func (s *scanner) init(input string) {
s.input = input
s.pos = 0
s.ppos = 0
}
func (s *scanner) next() rune {
if s.pos >= len(s.input) {
return tokenEOF
}
s.pos = s.ppos
r, w := utf8.DecodeRuneInString(s.input[s.ppos:])
s.ppos += w
if r == utf8.RuneError {
if w > 0 {
s.error("rune error")
return tokenIllegal
}
return tokenEOF
}
if r == 0 {
s.error("unexpected null")
return tokenIllegal
}
return r
}
func (s *scanner) peek() rune {
pos := s.pos
ppos := s.ppos
ch := s.next()
s.pos = pos
s.ppos = ppos
return ch
}
func (s *scanner) scan() (nextp int, tk token, text string) {
var (
ch = s.next()
pos = s.pos
)
chomp:
switch {
case ch == tokenEOF:
case ch == tokenIllegal:
case isQuoteRune(ch):
if !s.scanQuoted(ch) {
return pos, tokenIllegal, s.input[pos:s.ppos]
}
return pos, tokenQuoted, s.input[pos:s.ppos]
case isSeparatorRune(ch):
s.value = false
return pos, tokenSeparator, s.input[pos:s.ppos]
case isOperatorRune(ch):
s.scanOperator()
s.value = true
return pos, tokenOperator, s.input[pos:s.ppos]
case unicode.IsSpace(ch):
// chomp
ch = s.next()
pos = s.pos
goto chomp
case s.value:
s.scanValue()
s.value = false
return pos, tokenValue, s.input[pos:s.ppos]
case isFieldRune(ch):
s.scanField()
return pos, tokenField, s.input[pos:s.ppos]
}
return s.pos, token(ch), ""
}
func (s *scanner) scanField() {
for {
ch := s.peek()
if !isFieldRune(ch) {
break
}
s.next()
}
}
func (s *scanner) scanOperator() {
for {
ch := s.peek()
switch ch {
case '=', '!', '~':
s.next()
default:
return
}
}
}
func (s *scanner) scanValue() {
for {
ch := s.peek()
if !isValueRune(ch) {
break
}
s.next()
}
}
func (s *scanner) scanQuoted(quote rune) bool {
var illegal bool
ch := s.next() // read character after quote
for ch != quote {
if ch == '\n' || ch < 0 {
s.error("quoted literal not terminated")
return false
}
if ch == '\\' {
var legal bool
ch, legal = s.scanEscape(quote)
if !legal {
illegal = true
}
} else {
ch = s.next()
}
}
return !illegal
}
func (s *scanner) scanEscape(quote rune) (ch rune, legal bool) {
ch = s.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
// nothing to do
ch = s.next()
legal = true
case '0', '1', '2', '3', '4', '5', '6', '7':
ch, legal = s.scanDigits(ch, 8, 3)
case 'x':
ch, legal = s.scanDigits(s.next(), 16, 2)
case 'u':
ch, legal = s.scanDigits(s.next(), 16, 4)
case 'U':
ch, legal = s.scanDigits(s.next(), 16, 8)
default:
s.error("illegal escape sequence")
}
return
}
func (s *scanner) scanDigits(ch rune, base, n int) (rune, bool) {
for n > 0 && digitVal(ch) < base {
ch = s.next()
n--
}
if n > 0 {
s.error("illegal numeric escape sequence")
return ch, false
}
return ch, true
}
func (s *scanner) error(msg string) {
if s.err == "" {
s.err = msg
}
}
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
}
return 16 // larger than any legal digit val
}
func isFieldRune(r rune) bool {
return (r == '_' || isAlphaRune(r) || isDigitRune(r))
}
func isAlphaRune(r rune) bool {
return r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z'
}
func isDigitRune(r rune) bool {
return r >= '0' && r <= '9'
}
func isOperatorRune(r rune) bool {
switch r {
case '=', '!', '~':
return true
}
return false
}
func isQuoteRune(r rune) bool {
switch r {
case '/', '|', '"': // maybe add single quoting?
return true
}
return false
}
func isSeparatorRune(r rune) bool {
switch r {
case ',', '.':
return true
}
return false
}
func isValueRune(r rune) bool {
return r != ',' && !unicode.IsSpace(r) &&
(unicode.IsLetter(r) ||
unicode.IsDigit(r) ||
unicode.IsNumber(r) ||
unicode.IsGraphic(r) ||
unicode.IsPunct(r))
}

374
pkg/filters/scanner_test.go Normal file
View File

@@ -0,0 +1,374 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filters
import (
"fmt"
"testing"
)
type tokenResult struct {
pos int
token token
text string
err string
}
func (tr tokenResult) String() string {
if tr.err != "" {
return fmt.Sprintf("{pos: %v, token: %v, text: %q, err: %q}", tr.pos, tr.token, tr.text, tr.err)
}
return fmt.Sprintf("{pos: %v, token: %v, text: %q}", tr.pos, tr.token, tr.text)
}
func TestScanner(t *testing.T) {
for _, testcase := range []struct {
name string
input string
expected []tokenResult
}{
{
name: "Field",
input: "name",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenEOF},
},
},
{
name: "SelectorsWithOperators",
input: "name==value,foo!=bar",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "=="},
{pos: 6, token: tokenValue, text: "value"},
{pos: 11, token: tokenSeparator, text: ","},
{pos: 12, token: tokenField, text: "foo"},
{pos: 15, token: tokenOperator, text: "!="},
{pos: 17, token: tokenValue, text: "bar"},
{pos: 20, token: tokenEOF},
},
},
{
name: "SelectorsWithFieldPaths",
input: "name==value,labels.foo=value,other.bar~=match",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "=="},
{pos: 6, token: tokenValue, text: "value"},
{pos: 11, token: tokenSeparator, text: ","},
{pos: 12, token: tokenField, text: "labels"},
{pos: 18, token: tokenSeparator, text: "."},
{pos: 19, token: tokenField, text: "foo"},
{pos: 22, token: tokenOperator, text: "="},
{pos: 23, token: tokenValue, text: "value"},
{pos: 28, token: tokenSeparator, text: ","},
{pos: 29, token: tokenField, text: "other"},
{pos: 34, token: tokenSeparator, text: "."},
{pos: 35, token: tokenField, text: "bar"},
{pos: 38, token: tokenOperator, text: "~="},
{pos: 40, token: tokenValue, text: "match"},
{pos: 45, token: tokenEOF},
},
},
{
name: "RegexpValue",
input: "name~=[abc]+,foo=test",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "~="},
{pos: 6, token: tokenValue, text: "[abc]+"},
{pos: 12, token: tokenSeparator, text: ","},
{pos: 13, token: tokenField, text: "foo"},
{pos: 16, token: tokenOperator, text: "="},
{pos: 17, token: tokenValue, text: "test"},
{pos: 21, token: tokenEOF},
},
},
{
name: "RegexpEscapedValue",
input: `name~=[abc]\+,foo=test`,
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "~="},
{pos: 6, token: tokenValue, text: "[abc]\\+"},
{pos: 13, token: tokenSeparator, text: ","},
{pos: 14, token: tokenField, text: "foo"},
{pos: 17, token: tokenOperator, text: "="},
{pos: 18, token: tokenValue, text: "test"},
{pos: 22, token: tokenEOF},
},
},
{
name: "RegexpQuotedValue",
input: `name~=/[abc]{0,2}/,foo=test`,
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "~="},
{pos: 6, token: tokenQuoted, text: "/[abc]{0,2}/"},
{pos: 18, token: tokenSeparator, text: ","},
{pos: 19, token: tokenField, text: "foo"},
{pos: 22, token: tokenOperator, text: "="},
{pos: 23, token: tokenValue, text: "test"},
{pos: 27, token: tokenEOF},
},
},
{
name: "Cowsay",
input: "name~=牛,labels.moo=true",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "~="},
{pos: 6, token: tokenValue, text: "牛"},
{pos: 9, token: tokenSeparator, text: ","},
{pos: 10, token: tokenField, text: "labels"},
{pos: 16, token: tokenSeparator, text: "."},
{pos: 17, token: tokenField, text: "moo"},
{pos: 20, token: tokenOperator, text: "="},
{pos: 21, token: tokenValue, text: "true"},
{pos: 25, token: tokenEOF},
},
},
{
name: "CowsayRegexpQuoted",
input: "name~=|牛|,labels.moo=true",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "~="},
{pos: 6, token: tokenQuoted, text: "|牛|"},
{pos: 11, token: tokenSeparator, text: ","},
{pos: 12, token: tokenField, text: "labels"},
{pos: 18, token: tokenSeparator, text: "."},
{pos: 19, token: tokenField, text: "moo"},
{pos: 22, token: tokenOperator, text: "="},
{pos: 23, token: tokenValue, text: "true"},
{pos: 27, token: tokenEOF},
},
},
{
name: "Escapes",
input: `name~="asdf\n\tfooo"`,
expected: []tokenResult{
{pos: 0, token: tokenField, text: "name"},
{pos: 4, token: tokenOperator, text: "~="},
{pos: 6, token: tokenQuoted, text: "\"asdf\\n\\tfooo\""},
{pos: 20, token: tokenEOF},
},
},
{
name: "NullInput",
input: "foo\x00bar",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "foo"},
{pos: 3, token: tokenIllegal, err: "unexpected null"},
{pos: 4, token: tokenField, text: "bar"},
{pos: 7, token: tokenEOF},
},
},
{
name: "SpacesChomped",
input: "foo = bar ",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "foo"},
{pos: 4, token: tokenOperator, text: "="},
{pos: 6, token: tokenValue, text: "bar"},
{pos: 13, token: tokenEOF},
},
},
{
name: "ValuesPunctauted",
input: "compound.labels==punctuated_value.foo-bar",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "compound"},
{pos: 8, token: tokenSeparator, text: "."},
{pos: 9, token: tokenField, text: "labels"},
{pos: 15, token: tokenOperator, text: "=="},
{pos: 17, token: tokenValue, text: "punctuated_value.foo-bar"},
{pos: 41, token: tokenEOF},
},
},
{
name: "PartialInput",
input: "interrupted=",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "interrupted"},
{pos: 11, token: tokenOperator, text: "="},
{pos: 12, token: tokenEOF},
},
},
{
name: "DoubleValue",
input: "doublevalue=value value",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "doublevalue"},
{pos: 11, token: tokenOperator, text: "="},
{pos: 12, token: tokenValue, text: "value"},
{pos: 18, token: tokenField, text: "value"},
{pos: 23, token: tokenEOF},
},
},
{
name: "LeadingWithQuoted",
input: `"leading quote".postquote==value`,
expected: []tokenResult{
{pos: 0, token: tokenQuoted, text: "\"leading quote\""},
{pos: 15, token: tokenSeparator, text: "."},
{pos: 16, token: tokenField, text: "postquote"},
{pos: 25, token: tokenOperator, text: "=="},
{pos: 27, token: tokenValue, text: "value"},
{pos: 32, token: tokenEOF},
},
},
{
name: "MissingValue",
input: "input==,id!=ff",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "input"},
{pos: 5, token: tokenOperator, text: "=="},
{pos: 7, token: tokenSeparator, text: ","},
{pos: 8, token: tokenField, text: "id"},
{pos: 10, token: tokenOperator, text: "!="},
{pos: 12, token: tokenValue, text: "ff"},
{pos: 14, token: tokenEOF},
},
},
{
name: "QuotedRegexp",
input: "input~=/foo\\/bar/,id!=ff",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "input"},
{pos: 5, token: tokenOperator, text: "~="},
{pos: 7, token: tokenQuoted, text: "/foo\\/bar/"},
{pos: 17, token: tokenSeparator, text: ","},
{pos: 18, token: tokenField, text: "id"},
{pos: 20, token: tokenOperator, text: "!="},
{pos: 22, token: tokenValue, text: "ff"},
{pos: 24, token: tokenEOF},
},
},
{
name: "QuotedRegexpAlt",
input: "input~=|foo/bar|,id!=ff",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "input"},
{pos: 5, token: tokenOperator, text: "~="},
{pos: 7, token: tokenQuoted, text: "|foo/bar|"},
{pos: 16, token: tokenSeparator, text: ","},
{pos: 17, token: tokenField, text: "id"},
{pos: 19, token: tokenOperator, text: "!="},
{pos: 21, token: tokenValue, text: "ff"},
{pos: 23, token: tokenEOF},
},
},
{
name: "IllegalQuoted",
input: "labels.containerd.io/key==value",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "labels"},
{pos: 6, token: tokenSeparator, text: "."},
{pos: 7, token: tokenField, text: "containerd"},
{pos: 17, token: tokenSeparator, text: "."},
{pos: 18, token: tokenField, text: "io"},
{pos: 20, token: tokenIllegal, text: "/key==value", err: "quoted literal not terminated"},
{pos: 31, token: tokenEOF},
},
},
{
name: "IllegalQuotedWithNewLine",
input: "labels.\"containerd.io\nkey\"==value",
expected: []tokenResult{
{pos: 0, token: tokenField, text: "labels"},
{pos: 6, token: tokenSeparator, text: "."},
{pos: 7, token: tokenIllegal, text: "\"containerd.io\n", err: "quoted literal not terminated"},
{pos: 22, token: tokenField, text: "key"},
{pos: 25, token: tokenIllegal, text: "\"==value", err: "quoted literal not terminated"},
{pos: 33, token: tokenEOF},
},
},
{
name: "IllegalEscapeSequence",
input: `labels."\g"`,
expected: []tokenResult{
{pos: 0, token: tokenField, text: "labels"},
{pos: 6, token: tokenSeparator, text: "."},
{pos: 7, token: tokenIllegal, text: `"\g"`, err: "illegal escape sequence"},
{pos: 11, token: tokenEOF},
},
},
{
name: "IllegalNumericEscapeSequence",
input: `labels."\xaz"`,
expected: []tokenResult{
{pos: 0, token: tokenField, text: "labels"},
{pos: 6, token: tokenSeparator, text: "."},
{pos: 7, token: tokenIllegal, text: `"\xaz"`, err: "illegal numeric escape sequence"},
{pos: 13, token: tokenEOF},
},
},
} {
t.Run(testcase.name, func(t *testing.T) {
var sc scanner
sc.init(testcase.input)
// If you leave the expected empty, the test case will just print
// out the token stream, which you can paste into the testcase when
// adding new cases.
if len(testcase.expected) == 0 {
fmt.Println("Name", testcase.name)
}
for i := 0; ; i++ {
pos, tok, s := sc.scan()
if len(testcase.expected) == 0 {
if len(s) > 0 {
fmt.Printf("{pos: %v, token: %#v, text: %q},\n", pos, tok, s)
} else {
fmt.Printf("{pos: %v, token: %#v},\n", pos, tok)
}
} else {
tokv := tokenResult{pos: pos, token: tok, text: s}
if i >= len(testcase.expected) {
t.Fatalf("too many tokens parsed")
}
if tok == tokenIllegal {
tokv.err = sc.err
}
if tokv != testcase.expected[i] {
t.Fatalf("token unexpected: %v != %v", tokv, testcase.expected[i])
}
}
if tok == tokenEOF {
break
}
}
// make sure we've eof'd
_, tok, _ := sc.scan()
if tok != tokenEOF {
t.Fatal("must consume all input")
}
if len(testcase.expected) == 0 {
t.Fatal("must define expected tokens")
}
})
}
}