diff --git a/filters/filter_test.go b/filters/filter_test.go index e166ed916..5eddd56a1 100644 --- a/filters/filter_test.go +++ b/filters/filter_test.go @@ -135,6 +135,11 @@ func TestFilters(t *testing.T) { corpus[8], }, }, + { + name: "LabelValueNoAltQuoting", + input: "labels.|foo|==omg_asdf.asdf-qwer", + errString: "filters: parse error: [labels. >|||< foo|==omg_asdf.asdf-qwer]: invalid quote encountered", + }, { name: "Name", input: "name==bar", @@ -178,6 +183,27 @@ func TestFilters(t *testing.T) { corpus[7], }, }, + { + name: "RegexpQuotedValue", + input: "other~=/[abc]+/,name!=foo", + expected: []interface{}{ + corpus[6], + corpus[7], + }, + }, + { + name: "RegexpQuotedValue", + input: "other~=/[abc]{1,2}/,name!=foo", + expected: []interface{}{ + corpus[6], + corpus[7], + }, + }, + { + name: "RegexpQuotedValueGarbage", + input: "other~=/[abc]{0,1}\"\\//,name!=foo", + // valid syntax, but doesn't match anything + }, { name: "NameAndLabelValue", input: "name==bar,labels.bar==true", diff --git a/filters/parser.go b/filters/parser.go index c9b09847b..c765ea00c 100644 --- a/filters/parser.go +++ b/filters/parser.go @@ -3,7 +3,6 @@ package filters import ( "fmt" "io" - "strconv" "github.com/containerd/containerd/errdefs" "github.com/pkg/errors" @@ -134,7 +133,12 @@ func (p *parser) selector() (selector, error) { return selector{}, err } - value, err := p.value() + var allowAltQuotes bool + if op == operatorMatches { + allowAltQuotes = true + } + + value, err := p.value(allowAltQuotes) if err != nil { if err == io.EOF { return selector{}, io.ErrUnexpectedEOF @@ -188,7 +192,7 @@ func (p *parser) field() (string, error) { case tokenField: return s, nil case tokenQuoted: - return p.unquote(pos, s) + return p.unquote(pos, s, false) } return "", p.mkerr(pos, "expected field or quoted") @@ -213,21 +217,25 @@ func (p *parser) operator() (operator, error) { return 0, p.mkerr(pos, `expected an operator ("=="|"!="|"~=")`) } -func (p *parser) value() (string, error) { +func (p *parser) value(allowAltQuotes bool) (string, error) { pos, tok, s := p.scanner.scan() switch tok { case tokenValue, tokenField: return s, nil case tokenQuoted: - return p.unquote(pos, s) + return p.unquote(pos, s, allowAltQuotes) } return "", p.mkerr(pos, "expected value or quoted") } -func (p *parser) unquote(pos int, s string) (string, error) { - uq, err := strconv.Unquote(s) +func (p *parser) unquote(pos int, s string, allowAlts bool) (string, error) { + if !allowAlts && s[0] != '\'' && s[0] != '"' { + return "", p.mkerr(pos, "invalid quote encountered") + } + + uq, err := unquote(s) if err != nil { return "", p.mkerr(pos, "unquoting failed: %v", err) } diff --git a/filters/quote.go b/filters/quote.go new file mode 100644 index 000000000..08698e1ba --- /dev/null +++ b/filters/quote.go @@ -0,0 +1,237 @@ +package filters + +import ( + "unicode/utf8" + + "github.com/pkg/errors" +) + +// NOTE(stevvooe): Most of this code in this file is copied from the stdlib +// strconv package and modified to be able to handle quoting with `/` and `|` +// as delimiters. The copyright is held by the Go authors. + +var errQuoteSyntax = errors.New("quote syntax error") + +// UnquoteChar decodes the first character or byte in the escaped string +// or character literal represented by the string s. +// It returns four values: +// +// 1) value, the decoded Unicode code point or byte value; +// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; +// 3) tail, the remainder of the string after the character; and +// 4) an error that will be nil if the character is syntactically valid. +// +// The second argument, quote, specifies the type of literal being parsed +// and therefore which escaped quote character is permitted. +// If set to a single quote, it permits the sequence \' and disallows unescaped '. +// If set to a double quote, it permits \" and disallows unescaped ". +// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. +// +// This is from Go strconv package, modified to support `|` and `/` as double +// quotes for use with regular expressions. +func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { + // easy cases + switch c := s[0]; { + case c == quote && (quote == '\'' || quote == '"' || quote == '/' || quote == '|'): + err = errQuoteSyntax + return + case c >= utf8.RuneSelf: + r, size := utf8.DecodeRuneInString(s) + return r, true, s[size:], nil + case c != '\\': + return rune(s[0]), false, s[1:], nil + } + + // hard case: c is backslash + if len(s) <= 1 { + err = errQuoteSyntax + return + } + c := s[1] + s = s[2:] + + switch c { + case 'a': + value = '\a' + case 'b': + value = '\b' + case 'f': + value = '\f' + case 'n': + value = '\n' + case 'r': + value = '\r' + case 't': + value = '\t' + case 'v': + value = '\v' + case 'x', 'u', 'U': + n := 0 + switch c { + case 'x': + n = 2 + case 'u': + n = 4 + case 'U': + n = 8 + } + var v rune + if len(s) < n { + err = errQuoteSyntax + return + } + for j := 0; j < n; j++ { + x, ok := unhex(s[j]) + if !ok { + err = errQuoteSyntax + return + } + v = v<<4 | x + } + s = s[n:] + if c == 'x' { + // single-byte string, possibly not UTF-8 + value = v + break + } + if v > utf8.MaxRune { + err = errQuoteSyntax + return + } + value = v + multibyte = true + case '0', '1', '2', '3', '4', '5', '6', '7': + v := rune(c) - '0' + if len(s) < 2 { + err = errQuoteSyntax + return + } + for j := 0; j < 2; j++ { // one digit already; two more + x := rune(s[j]) - '0' + if x < 0 || x > 7 { + err = errQuoteSyntax + return + } + v = (v << 3) | x + } + s = s[2:] + if v > 255 { + err = errQuoteSyntax + return + } + value = v + case '\\': + value = '\\' + case '\'', '"', '|', '/': + if c != quote { + err = errQuoteSyntax + return + } + value = rune(c) + default: + err = errQuoteSyntax + return + } + tail = s + return +} + +// unquote interprets s as a single-quoted, double-quoted, +// or backquoted Go string literal, returning the string value +// that s quotes. (If s is single-quoted, it would be a Go +// character literal; Unquote returns the corresponding +// one-character string.) +// +// This is modified from the standard library to support `|` and `/` as quote +// characters for use with regular expressions. +func unquote(s string) (string, error) { + n := len(s) + if n < 2 { + return "", errQuoteSyntax + } + quote := s[0] + if quote != s[n-1] { + return "", errQuoteSyntax + } + s = s[1 : n-1] + + if quote == '`' { + if contains(s, '`') { + return "", errQuoteSyntax + } + if contains(s, '\r') { + // -1 because we know there is at least one \r to remove. + buf := make([]byte, 0, len(s)-1) + for i := 0; i < len(s); i++ { + if s[i] != '\r' { + buf = append(buf, s[i]) + } + } + return string(buf), nil + } + return s, nil + } + if quote != '"' && quote != '\'' && quote != '|' && quote != '/' { + return "", errQuoteSyntax + } + if contains(s, '\n') { + return "", errQuoteSyntax + } + + // Is it trivial? Avoid allocation. + if !contains(s, '\\') && !contains(s, quote) { + switch quote { + case '"', '/', '|': // pipe and slash are treated like double quote + return s, nil + case '\'': + r, size := utf8.DecodeRuneInString(s) + if size == len(s) && (r != utf8.RuneError || size != 1) { + return s, nil + } + } + } + + var runeTmp [utf8.UTFMax]byte + buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. + for len(s) > 0 { + c, multibyte, ss, err := unquoteChar(s, quote) + if err != nil { + return "", err + } + s = ss + if c < utf8.RuneSelf || !multibyte { + buf = append(buf, byte(c)) + } else { + n := utf8.EncodeRune(runeTmp[:], c) + buf = append(buf, runeTmp[:n]...) + } + if quote == '\'' && len(s) != 0 { + // single-quoted must be single character + return "", errQuoteSyntax + } + } + return string(buf), nil +} + +// contains reports whether the string contains the byte c. +func contains(s string, c byte) bool { + for i := 0; i < len(s); i++ { + if s[i] == c { + return true + } + } + return false +} + +func unhex(b byte) (v rune, ok bool) { + c := rune(b) + switch { + case '0' <= c && c <= '9': + return c - '0', true + case 'a' <= c && c <= 'f': + return c - 'a' + 10, true + case 'A' <= c && c <= 'F': + return c - 'A' + 10, true + } + return +} diff --git a/filters/scanner.go b/filters/scanner.go index 5a55e0abf..3a8e72395 100644 --- a/filters/scanner.go +++ b/filters/scanner.go @@ -87,7 +87,7 @@ func (s *scanner) peek() rune { return ch } -func (s *scanner) scan() (int, token, string) { +func (s *scanner) scan() (nextp int, tk token, text string) { var ( ch = s.next() pos = s.pos @@ -101,6 +101,7 @@ chomp: s.scanQuoted(ch) return pos, tokenQuoted, s.input[pos:s.ppos] case isSeparatorRune(ch): + s.value = false return pos, tokenSeparator, s.input[pos:s.ppos] case isOperatorRune(ch): s.scanOperator() @@ -241,7 +242,7 @@ func isOperatorRune(r rune) bool { func isQuoteRune(r rune) bool { switch r { - case '"': // maybe add single quoting? + case '/', '|', '"': // maybe add single quoting? return true } diff --git a/filters/scanner_test.go b/filters/scanner_test.go index 27b1a899b..2b7cd5336 100644 --- a/filters/scanner_test.go +++ b/filters/scanner_test.go @@ -95,6 +95,20 @@ func TestScanner(t *testing.T) { {pos: 22, token: tokenEOF}, }, }, + { + name: "RegexpQuotedValue", + input: `name~=/[abc]{0,2}/,foo=test`, + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "~="}, + {pos: 6, token: tokenQuoted, text: "/[abc]{0,2}/"}, + {pos: 18, token: tokenSeparator, text: ","}, + {pos: 19, token: tokenField, text: "foo"}, + {pos: 22, token: tokenOperator, text: "="}, + {pos: 23, token: tokenValue, text: "test"}, + {pos: 27, token: tokenEOF}, + }, + }, { name: "Cowsay", input: "name~=牛,labels.moo=true", @@ -111,6 +125,22 @@ func TestScanner(t *testing.T) { {pos: 25, token: tokenEOF}, }, }, + { + name: "CowsayRegexpQuoted", + input: "name~=|牛|,labels.moo=true", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "name"}, + {pos: 4, token: tokenOperator, text: "~="}, + {pos: 6, token: tokenQuoted, text: "|牛|"}, + {pos: 11, token: tokenSeparator, text: ","}, + {pos: 12, token: tokenField, text: "labels"}, + {pos: 18, token: tokenSeparator, text: "."}, + {pos: 19, token: tokenField, text: "moo"}, + {pos: 22, token: tokenOperator, text: "="}, + {pos: 23, token: tokenValue, text: "true"}, + {pos: 27, token: tokenEOF}, + }, + }, { name: "Escapes", input: `name~="asdf\n\tfooo"`, @@ -187,15 +217,45 @@ func TestScanner(t *testing.T) { }, { name: "MissingValue", - input: "input==,id?=ff", + input: "input==,id!=ff", expected: []tokenResult{ {pos: 0, token: tokenField, text: "input"}, {pos: 5, token: tokenOperator, text: "=="}, {pos: 7, token: tokenSeparator, text: ","}, - {pos: 8, token: tokenValue, text: "id?=ff"}, + {pos: 8, token: tokenField, text: "id"}, + {pos: 10, token: tokenOperator, text: "!="}, + {pos: 12, token: tokenValue, text: "ff"}, {pos: 14, token: tokenEOF}, }, }, + { + name: "QuotedRegexp", + input: "input~=/foo\\/bar/,id!=ff", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "input"}, + {pos: 5, token: tokenOperator, text: "~="}, + {pos: 7, token: tokenQuoted, text: "/foo\\/bar/"}, + {pos: 17, token: tokenSeparator, text: ","}, + {pos: 18, token: tokenField, text: "id"}, + {pos: 20, token: tokenOperator, text: "!="}, + {pos: 22, token: tokenValue, text: "ff"}, + {pos: 24, token: tokenEOF}, + }, + }, + { + name: "QuotedRegexpAlt", + input: "input~=|foo/bar|,id!=ff", + expected: []tokenResult{ + {pos: 0, token: tokenField, text: "input"}, + {pos: 5, token: tokenOperator, text: "~="}, + {pos: 7, token: tokenQuoted, text: "|foo/bar|"}, + {pos: 16, token: tokenSeparator, text: ","}, + {pos: 17, token: tokenField, text: "id"}, + {pos: 19, token: tokenOperator, text: "!="}, + {pos: 21, token: tokenValue, text: "ff"}, + {pos: 23, token: tokenEOF}, + }, + }, } { t.Run(testcase.name, func(t *testing.T) { var sc scanner