
full diff: https://github.com/pelletier/go-toml/compare/v1.8.1...v1.9.3 - v1.9.3: Clarify license and comply with Apache 2.0 - v1.9.2: Add Encoder.CompactComments to omit extra new line - v1.9.1: Fix empty trees line counting v1.9.0 ------------------- The highlight of this version is that the whole toml.Tree structure has been made public in a backward compatible way. This allows everyone using v1.x to fully access the data and metadata in the tree to extend the library. This is hopefully the last release in the v1.x track, as go-toml v2 is the main focus of development. What's new - TOML 1.0.0-rc.3 - Improved default tag for durations - Provide Tree and treeValue public aliases - Expose MarshalOrder - Value string representation public function Fixed bugs - Do not allow T-prefix on local dates - toml.Unmarshaler supports leaf nodes - Fix date lexer to only support 4-digit year - Fix ToMap for tables in mixed-type arrays - Fix ToMap for tables in nested mixed-type arrays - Support literal multiline marshal Performance - Remove date regexp - Remove underscore regexps Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
1032 lines
19 KiB
Go
1032 lines
19 KiB
Go
// TOML lexer.
|
|
//
|
|
// Written using the principles developed by Rob Pike in
|
|
// http://www.youtube.com/watch?v=HxaD_trXwRE
|
|
|
|
package toml
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// Define state functions
|
|
type tomlLexStateFn func() tomlLexStateFn
|
|
|
|
// Define lexer
|
|
type tomlLexer struct {
|
|
inputIdx int
|
|
input []rune // Textual source
|
|
currentTokenStart int
|
|
currentTokenStop int
|
|
tokens []token
|
|
brackets []rune
|
|
line int
|
|
col int
|
|
endbufferLine int
|
|
endbufferCol int
|
|
}
|
|
|
|
// Basic read operations on input
|
|
|
|
func (l *tomlLexer) read() rune {
|
|
r := l.peek()
|
|
if r == '\n' {
|
|
l.endbufferLine++
|
|
l.endbufferCol = 1
|
|
} else {
|
|
l.endbufferCol++
|
|
}
|
|
l.inputIdx++
|
|
return r
|
|
}
|
|
|
|
func (l *tomlLexer) next() rune {
|
|
r := l.read()
|
|
|
|
if r != eof {
|
|
l.currentTokenStop++
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (l *tomlLexer) ignore() {
|
|
l.currentTokenStart = l.currentTokenStop
|
|
l.line = l.endbufferLine
|
|
l.col = l.endbufferCol
|
|
}
|
|
|
|
func (l *tomlLexer) skip() {
|
|
l.next()
|
|
l.ignore()
|
|
}
|
|
|
|
func (l *tomlLexer) fastForward(n int) {
|
|
for i := 0; i < n; i++ {
|
|
l.next()
|
|
}
|
|
}
|
|
|
|
func (l *tomlLexer) emitWithValue(t tokenType, value string) {
|
|
l.tokens = append(l.tokens, token{
|
|
Position: Position{l.line, l.col},
|
|
typ: t,
|
|
val: value,
|
|
})
|
|
l.ignore()
|
|
}
|
|
|
|
func (l *tomlLexer) emit(t tokenType) {
|
|
l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
|
|
}
|
|
|
|
func (l *tomlLexer) peek() rune {
|
|
if l.inputIdx >= len(l.input) {
|
|
return eof
|
|
}
|
|
return l.input[l.inputIdx]
|
|
}
|
|
|
|
func (l *tomlLexer) peekString(size int) string {
|
|
maxIdx := len(l.input)
|
|
upperIdx := l.inputIdx + size // FIXME: potential overflow
|
|
if upperIdx > maxIdx {
|
|
upperIdx = maxIdx
|
|
}
|
|
return string(l.input[l.inputIdx:upperIdx])
|
|
}
|
|
|
|
func (l *tomlLexer) follow(next string) bool {
|
|
return next == l.peekString(len(next))
|
|
}
|
|
|
|
// Error management
|
|
|
|
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
|
|
l.tokens = append(l.tokens, token{
|
|
Position: Position{l.line, l.col},
|
|
typ: tokenError,
|
|
val: fmt.Sprintf(format, args...),
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// State functions
|
|
|
|
func (l *tomlLexer) lexVoid() tomlLexStateFn {
|
|
for {
|
|
next := l.peek()
|
|
switch next {
|
|
case '}': // after '{'
|
|
return l.lexRightCurlyBrace
|
|
case '[':
|
|
return l.lexTableKey
|
|
case '#':
|
|
return l.lexComment(l.lexVoid)
|
|
case '=':
|
|
return l.lexEqual
|
|
case '\r':
|
|
fallthrough
|
|
case '\n':
|
|
l.skip()
|
|
continue
|
|
}
|
|
|
|
if isSpace(next) {
|
|
l.skip()
|
|
}
|
|
|
|
if isKeyStartChar(next) {
|
|
return l.lexKey
|
|
}
|
|
|
|
if next == eof {
|
|
l.next()
|
|
break
|
|
}
|
|
}
|
|
|
|
l.emit(tokenEOF)
|
|
return nil
|
|
}
|
|
|
|
func (l *tomlLexer) lexRvalue() tomlLexStateFn {
|
|
for {
|
|
next := l.peek()
|
|
switch next {
|
|
case '.':
|
|
return l.errorf("cannot start float with a dot")
|
|
case '=':
|
|
return l.lexEqual
|
|
case '[':
|
|
return l.lexLeftBracket
|
|
case ']':
|
|
return l.lexRightBracket
|
|
case '{':
|
|
return l.lexLeftCurlyBrace
|
|
case '}':
|
|
return l.lexRightCurlyBrace
|
|
case '#':
|
|
return l.lexComment(l.lexRvalue)
|
|
case '"':
|
|
return l.lexString
|
|
case '\'':
|
|
return l.lexLiteralString
|
|
case ',':
|
|
return l.lexComma
|
|
case '\r':
|
|
fallthrough
|
|
case '\n':
|
|
l.skip()
|
|
if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' {
|
|
return l.lexRvalue
|
|
}
|
|
return l.lexVoid
|
|
}
|
|
|
|
if l.follow("true") {
|
|
return l.lexTrue
|
|
}
|
|
|
|
if l.follow("false") {
|
|
return l.lexFalse
|
|
}
|
|
|
|
if l.follow("inf") {
|
|
return l.lexInf
|
|
}
|
|
|
|
if l.follow("nan") {
|
|
return l.lexNan
|
|
}
|
|
|
|
if isSpace(next) {
|
|
l.skip()
|
|
continue
|
|
}
|
|
|
|
if next == eof {
|
|
l.next()
|
|
break
|
|
}
|
|
|
|
if next == '+' || next == '-' {
|
|
return l.lexNumber
|
|
}
|
|
|
|
if isDigit(next) {
|
|
return l.lexDateTimeOrNumber
|
|
}
|
|
|
|
return l.errorf("no value can start with %c", next)
|
|
}
|
|
|
|
l.emit(tokenEOF)
|
|
return nil
|
|
}
|
|
|
|
func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn {
|
|
// Could be either a date/time, or a digit.
|
|
// The options for date/times are:
|
|
// YYYY-... => date or date-time
|
|
// HH:... => time
|
|
// Anything else should be a number.
|
|
|
|
lookAhead := l.peekString(5)
|
|
if len(lookAhead) < 3 {
|
|
return l.lexNumber()
|
|
}
|
|
|
|
for idx, r := range lookAhead {
|
|
if !isDigit(r) {
|
|
if idx == 2 && r == ':' {
|
|
return l.lexDateTimeOrTime()
|
|
}
|
|
if idx == 4 && r == '-' {
|
|
return l.lexDateTimeOrTime()
|
|
}
|
|
return l.lexNumber()
|
|
}
|
|
}
|
|
return l.lexNumber()
|
|
}
|
|
|
|
func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenLeftCurlyBrace)
|
|
l.brackets = append(l.brackets, '{')
|
|
return l.lexVoid
|
|
}
|
|
|
|
func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenRightCurlyBrace)
|
|
if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' {
|
|
return l.errorf("cannot have '}' here")
|
|
}
|
|
l.brackets = l.brackets[:len(l.brackets)-1]
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn {
|
|
// Example matches:
|
|
// 1979-05-27T07:32:00Z
|
|
// 1979-05-27T00:32:00-07:00
|
|
// 1979-05-27T00:32:00.999999-07:00
|
|
// 1979-05-27 07:32:00Z
|
|
// 1979-05-27 00:32:00-07:00
|
|
// 1979-05-27 00:32:00.999999-07:00
|
|
// 1979-05-27T07:32:00
|
|
// 1979-05-27T00:32:00.999999
|
|
// 1979-05-27 07:32:00
|
|
// 1979-05-27 00:32:00.999999
|
|
// 1979-05-27
|
|
// 07:32:00
|
|
// 00:32:00.999999
|
|
|
|
// we already know those two are digits
|
|
l.next()
|
|
l.next()
|
|
|
|
// Got 2 digits. At that point it could be either a time or a date(-time).
|
|
|
|
r := l.next()
|
|
if r == ':' {
|
|
return l.lexTime()
|
|
}
|
|
|
|
return l.lexDateTime()
|
|
}
|
|
|
|
func (l *tomlLexer) lexDateTime() tomlLexStateFn {
|
|
// This state accepts an offset date-time, a local date-time, or a local date.
|
|
//
|
|
// v--- cursor
|
|
// 1979-05-27T07:32:00Z
|
|
// 1979-05-27T00:32:00-07:00
|
|
// 1979-05-27T00:32:00.999999-07:00
|
|
// 1979-05-27 07:32:00Z
|
|
// 1979-05-27 00:32:00-07:00
|
|
// 1979-05-27 00:32:00.999999-07:00
|
|
// 1979-05-27T07:32:00
|
|
// 1979-05-27T00:32:00.999999
|
|
// 1979-05-27 07:32:00
|
|
// 1979-05-27 00:32:00.999999
|
|
// 1979-05-27
|
|
|
|
// date
|
|
|
|
// already checked by lexRvalue
|
|
l.next() // digit
|
|
l.next() // -
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid month digit in date: %c", r)
|
|
}
|
|
}
|
|
|
|
r := l.next()
|
|
if r != '-' {
|
|
return l.errorf("expected - to separate month of a date, not %c", r)
|
|
}
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid day digit in date: %c", r)
|
|
}
|
|
}
|
|
|
|
l.emit(tokenLocalDate)
|
|
|
|
r = l.peek()
|
|
|
|
if r == eof {
|
|
|
|
return l.lexRvalue
|
|
}
|
|
|
|
if r != ' ' && r != 'T' {
|
|
return l.errorf("incorrect date/time separation character: %c", r)
|
|
}
|
|
|
|
if r == ' ' {
|
|
lookAhead := l.peekString(3)[1:]
|
|
if len(lookAhead) < 2 {
|
|
return l.lexRvalue
|
|
}
|
|
for _, r := range lookAhead {
|
|
if !isDigit(r) {
|
|
return l.lexRvalue
|
|
}
|
|
}
|
|
}
|
|
|
|
l.skip() // skip the T or ' '
|
|
|
|
// time
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid hour digit in time: %c", r)
|
|
}
|
|
}
|
|
|
|
r = l.next()
|
|
if r != ':' {
|
|
return l.errorf("time hour/minute separator should be :, not %c", r)
|
|
}
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid minute digit in time: %c", r)
|
|
}
|
|
}
|
|
|
|
r = l.next()
|
|
if r != ':' {
|
|
return l.errorf("time minute/second separator should be :, not %c", r)
|
|
}
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid second digit in time: %c", r)
|
|
}
|
|
}
|
|
|
|
r = l.peek()
|
|
if r == '.' {
|
|
l.next()
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("expected at least one digit in time's fraction, not %c", r)
|
|
}
|
|
|
|
for {
|
|
r := l.peek()
|
|
if !isDigit(r) {
|
|
break
|
|
}
|
|
l.next()
|
|
}
|
|
}
|
|
|
|
l.emit(tokenLocalTime)
|
|
|
|
return l.lexTimeOffset
|
|
|
|
}
|
|
|
|
func (l *tomlLexer) lexTimeOffset() tomlLexStateFn {
|
|
// potential offset
|
|
|
|
// Z
|
|
// -07:00
|
|
// +07:00
|
|
// nothing
|
|
|
|
r := l.peek()
|
|
|
|
if r == 'Z' {
|
|
l.next()
|
|
l.emit(tokenTimeOffset)
|
|
} else if r == '+' || r == '-' {
|
|
l.next()
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid hour digit in time offset: %c", r)
|
|
}
|
|
}
|
|
|
|
r = l.next()
|
|
if r != ':' {
|
|
return l.errorf("time offset hour/minute separator should be :, not %c", r)
|
|
}
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid minute digit in time offset: %c", r)
|
|
}
|
|
}
|
|
|
|
l.emit(tokenTimeOffset)
|
|
}
|
|
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexTime() tomlLexStateFn {
|
|
// v--- cursor
|
|
// 07:32:00
|
|
// 00:32:00.999999
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid minute digit in time: %c", r)
|
|
}
|
|
}
|
|
|
|
r := l.next()
|
|
if r != ':' {
|
|
return l.errorf("time minute/second separator should be :, not %c", r)
|
|
}
|
|
|
|
for i := 0; i < 2; i++ {
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("invalid second digit in time: %c", r)
|
|
}
|
|
}
|
|
|
|
r = l.peek()
|
|
if r == '.' {
|
|
l.next()
|
|
r := l.next()
|
|
if !isDigit(r) {
|
|
return l.errorf("expected at least one digit in time's fraction, not %c", r)
|
|
}
|
|
|
|
for {
|
|
r := l.peek()
|
|
if !isDigit(r) {
|
|
break
|
|
}
|
|
l.next()
|
|
}
|
|
}
|
|
|
|
l.emit(tokenLocalTime)
|
|
return l.lexRvalue
|
|
|
|
}
|
|
|
|
func (l *tomlLexer) lexTrue() tomlLexStateFn {
|
|
l.fastForward(4)
|
|
l.emit(tokenTrue)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexFalse() tomlLexStateFn {
|
|
l.fastForward(5)
|
|
l.emit(tokenFalse)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexInf() tomlLexStateFn {
|
|
l.fastForward(3)
|
|
l.emit(tokenInf)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexNan() tomlLexStateFn {
|
|
l.fastForward(3)
|
|
l.emit(tokenNan)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexEqual() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenEqual)
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexComma() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenComma)
|
|
if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' {
|
|
return l.lexVoid
|
|
}
|
|
return l.lexRvalue
|
|
}
|
|
|
|
// Parse the key and emits its value without escape sequences.
|
|
// bare keys, basic string keys and literal string keys are supported.
|
|
func (l *tomlLexer) lexKey() tomlLexStateFn {
|
|
var sb strings.Builder
|
|
|
|
for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
|
|
if r == '"' {
|
|
l.next()
|
|
str, err := l.lexStringAsString(`"`, false, true)
|
|
if err != nil {
|
|
return l.errorf(err.Error())
|
|
}
|
|
sb.WriteString("\"")
|
|
sb.WriteString(str)
|
|
sb.WriteString("\"")
|
|
l.next()
|
|
continue
|
|
} else if r == '\'' {
|
|
l.next()
|
|
str, err := l.lexLiteralStringAsString(`'`, false)
|
|
if err != nil {
|
|
return l.errorf(err.Error())
|
|
}
|
|
sb.WriteString("'")
|
|
sb.WriteString(str)
|
|
sb.WriteString("'")
|
|
l.next()
|
|
continue
|
|
} else if r == '\n' {
|
|
return l.errorf("keys cannot contain new lines")
|
|
} else if isSpace(r) {
|
|
var str strings.Builder
|
|
str.WriteString(" ")
|
|
|
|
// skip trailing whitespace
|
|
l.next()
|
|
for r = l.peek(); isSpace(r); r = l.peek() {
|
|
str.WriteRune(r)
|
|
l.next()
|
|
}
|
|
// break loop if not a dot
|
|
if r != '.' {
|
|
break
|
|
}
|
|
str.WriteString(".")
|
|
// skip trailing whitespace after dot
|
|
l.next()
|
|
for r = l.peek(); isSpace(r); r = l.peek() {
|
|
str.WriteRune(r)
|
|
l.next()
|
|
}
|
|
sb.WriteString(str.String())
|
|
continue
|
|
} else if r == '.' {
|
|
// skip
|
|
} else if !isValidBareChar(r) {
|
|
return l.errorf("keys cannot contain %c character", r)
|
|
}
|
|
sb.WriteRune(r)
|
|
l.next()
|
|
}
|
|
l.emitWithValue(tokenKey, sb.String())
|
|
return l.lexVoid
|
|
}
|
|
|
|
func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
|
|
return func() tomlLexStateFn {
|
|
for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
|
|
if next == '\r' && l.follow("\r\n") {
|
|
break
|
|
}
|
|
l.next()
|
|
}
|
|
l.ignore()
|
|
return previousState
|
|
}
|
|
}
|
|
|
|
func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenLeftBracket)
|
|
l.brackets = append(l.brackets, '[')
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
|
|
var sb strings.Builder
|
|
|
|
if discardLeadingNewLine {
|
|
if l.follow("\r\n") {
|
|
l.skip()
|
|
l.skip()
|
|
} else if l.peek() == '\n' {
|
|
l.skip()
|
|
}
|
|
}
|
|
|
|
// find end of string
|
|
for {
|
|
if l.follow(terminator) {
|
|
return sb.String(), nil
|
|
}
|
|
|
|
next := l.peek()
|
|
if next == eof {
|
|
break
|
|
}
|
|
sb.WriteRune(l.next())
|
|
}
|
|
|
|
return "", errors.New("unclosed string")
|
|
}
|
|
|
|
func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
|
|
l.skip()
|
|
|
|
// handle special case for triple-quote
|
|
terminator := "'"
|
|
discardLeadingNewLine := false
|
|
if l.follow("''") {
|
|
l.skip()
|
|
l.skip()
|
|
terminator = "'''"
|
|
discardLeadingNewLine = true
|
|
}
|
|
|
|
str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
|
|
if err != nil {
|
|
return l.errorf(err.Error())
|
|
}
|
|
|
|
l.emitWithValue(tokenString, str)
|
|
l.fastForward(len(terminator))
|
|
l.ignore()
|
|
return l.lexRvalue
|
|
}
|
|
|
|
// Lex a string and return the results as a string.
|
|
// Terminator is the substring indicating the end of the token.
|
|
// The resulting string does not include the terminator.
|
|
func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
|
|
var sb strings.Builder
|
|
|
|
if discardLeadingNewLine {
|
|
if l.follow("\r\n") {
|
|
l.skip()
|
|
l.skip()
|
|
} else if l.peek() == '\n' {
|
|
l.skip()
|
|
}
|
|
}
|
|
|
|
for {
|
|
if l.follow(terminator) {
|
|
return sb.String(), nil
|
|
}
|
|
|
|
if l.follow("\\") {
|
|
l.next()
|
|
switch l.peek() {
|
|
case '\r':
|
|
fallthrough
|
|
case '\n':
|
|
fallthrough
|
|
case '\t':
|
|
fallthrough
|
|
case ' ':
|
|
// skip all whitespace chars following backslash
|
|
for strings.ContainsRune("\r\n\t ", l.peek()) {
|
|
l.next()
|
|
}
|
|
case '"':
|
|
sb.WriteString("\"")
|
|
l.next()
|
|
case 'n':
|
|
sb.WriteString("\n")
|
|
l.next()
|
|
case 'b':
|
|
sb.WriteString("\b")
|
|
l.next()
|
|
case 'f':
|
|
sb.WriteString("\f")
|
|
l.next()
|
|
case '/':
|
|
sb.WriteString("/")
|
|
l.next()
|
|
case 't':
|
|
sb.WriteString("\t")
|
|
l.next()
|
|
case 'r':
|
|
sb.WriteString("\r")
|
|
l.next()
|
|
case '\\':
|
|
sb.WriteString("\\")
|
|
l.next()
|
|
case 'u':
|
|
l.next()
|
|
var code strings.Builder
|
|
for i := 0; i < 4; i++ {
|
|
c := l.peek()
|
|
if !isHexDigit(c) {
|
|
return "", errors.New("unfinished unicode escape")
|
|
}
|
|
l.next()
|
|
code.WriteRune(c)
|
|
}
|
|
intcode, err := strconv.ParseInt(code.String(), 16, 32)
|
|
if err != nil {
|
|
return "", errors.New("invalid unicode escape: \\u" + code.String())
|
|
}
|
|
sb.WriteRune(rune(intcode))
|
|
case 'U':
|
|
l.next()
|
|
var code strings.Builder
|
|
for i := 0; i < 8; i++ {
|
|
c := l.peek()
|
|
if !isHexDigit(c) {
|
|
return "", errors.New("unfinished unicode escape")
|
|
}
|
|
l.next()
|
|
code.WriteRune(c)
|
|
}
|
|
intcode, err := strconv.ParseInt(code.String(), 16, 64)
|
|
if err != nil {
|
|
return "", errors.New("invalid unicode escape: \\U" + code.String())
|
|
}
|
|
sb.WriteRune(rune(intcode))
|
|
default:
|
|
return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
|
|
}
|
|
} else {
|
|
r := l.peek()
|
|
|
|
if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) {
|
|
return "", fmt.Errorf("unescaped control character %U", r)
|
|
}
|
|
l.next()
|
|
sb.WriteRune(r)
|
|
}
|
|
|
|
if l.peek() == eof {
|
|
break
|
|
}
|
|
}
|
|
|
|
return "", errors.New("unclosed string")
|
|
}
|
|
|
|
func (l *tomlLexer) lexString() tomlLexStateFn {
|
|
l.skip()
|
|
|
|
// handle special case for triple-quote
|
|
terminator := `"`
|
|
discardLeadingNewLine := false
|
|
acceptNewLines := false
|
|
if l.follow(`""`) {
|
|
l.skip()
|
|
l.skip()
|
|
terminator = `"""`
|
|
discardLeadingNewLine = true
|
|
acceptNewLines = true
|
|
}
|
|
|
|
str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
|
|
if err != nil {
|
|
return l.errorf(err.Error())
|
|
}
|
|
|
|
l.emitWithValue(tokenString, str)
|
|
l.fastForward(len(terminator))
|
|
l.ignore()
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) lexTableKey() tomlLexStateFn {
|
|
l.next()
|
|
|
|
if l.peek() == '[' {
|
|
// token '[[' signifies an array of tables
|
|
l.next()
|
|
l.emit(tokenDoubleLeftBracket)
|
|
return l.lexInsideTableArrayKey
|
|
}
|
|
// vanilla table key
|
|
l.emit(tokenLeftBracket)
|
|
return l.lexInsideTableKey
|
|
}
|
|
|
|
// Parse the key till "]]", but only bare keys are supported
|
|
func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
|
|
for r := l.peek(); r != eof; r = l.peek() {
|
|
switch r {
|
|
case ']':
|
|
if l.currentTokenStop > l.currentTokenStart {
|
|
l.emit(tokenKeyGroupArray)
|
|
}
|
|
l.next()
|
|
if l.peek() != ']' {
|
|
break
|
|
}
|
|
l.next()
|
|
l.emit(tokenDoubleRightBracket)
|
|
return l.lexVoid
|
|
case '[':
|
|
return l.errorf("table array key cannot contain ']'")
|
|
default:
|
|
l.next()
|
|
}
|
|
}
|
|
return l.errorf("unclosed table array key")
|
|
}
|
|
|
|
// Parse the key till "]" but only bare keys are supported
|
|
func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
|
|
for r := l.peek(); r != eof; r = l.peek() {
|
|
switch r {
|
|
case ']':
|
|
if l.currentTokenStop > l.currentTokenStart {
|
|
l.emit(tokenKeyGroup)
|
|
}
|
|
l.next()
|
|
l.emit(tokenRightBracket)
|
|
return l.lexVoid
|
|
case '[':
|
|
return l.errorf("table key cannot contain ']'")
|
|
default:
|
|
l.next()
|
|
}
|
|
}
|
|
return l.errorf("unclosed table key")
|
|
}
|
|
|
|
func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
|
|
l.next()
|
|
l.emit(tokenRightBracket)
|
|
if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' {
|
|
return l.errorf("cannot have ']' here")
|
|
}
|
|
l.brackets = l.brackets[:len(l.brackets)-1]
|
|
return l.lexRvalue
|
|
}
|
|
|
|
type validRuneFn func(r rune) bool
|
|
|
|
func isValidHexRune(r rune) bool {
|
|
return r >= 'a' && r <= 'f' ||
|
|
r >= 'A' && r <= 'F' ||
|
|
r >= '0' && r <= '9' ||
|
|
r == '_'
|
|
}
|
|
|
|
func isValidOctalRune(r rune) bool {
|
|
return r >= '0' && r <= '7' || r == '_'
|
|
}
|
|
|
|
func isValidBinaryRune(r rune) bool {
|
|
return r == '0' || r == '1' || r == '_'
|
|
}
|
|
|
|
func (l *tomlLexer) lexNumber() tomlLexStateFn {
|
|
r := l.peek()
|
|
|
|
if r == '0' {
|
|
follow := l.peekString(2)
|
|
if len(follow) == 2 {
|
|
var isValidRune validRuneFn
|
|
switch follow[1] {
|
|
case 'x':
|
|
isValidRune = isValidHexRune
|
|
case 'o':
|
|
isValidRune = isValidOctalRune
|
|
case 'b':
|
|
isValidRune = isValidBinaryRune
|
|
default:
|
|
if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
|
|
return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
|
|
}
|
|
}
|
|
|
|
if isValidRune != nil {
|
|
l.next()
|
|
l.next()
|
|
digitSeen := false
|
|
for {
|
|
next := l.peek()
|
|
if !isValidRune(next) {
|
|
break
|
|
}
|
|
digitSeen = true
|
|
l.next()
|
|
}
|
|
|
|
if !digitSeen {
|
|
return l.errorf("number needs at least one digit")
|
|
}
|
|
|
|
l.emit(tokenInteger)
|
|
|
|
return l.lexRvalue
|
|
}
|
|
}
|
|
}
|
|
|
|
if r == '+' || r == '-' {
|
|
l.next()
|
|
if l.follow("inf") {
|
|
return l.lexInf
|
|
}
|
|
if l.follow("nan") {
|
|
return l.lexNan
|
|
}
|
|
}
|
|
|
|
pointSeen := false
|
|
expSeen := false
|
|
digitSeen := false
|
|
for {
|
|
next := l.peek()
|
|
if next == '.' {
|
|
if pointSeen {
|
|
return l.errorf("cannot have two dots in one float")
|
|
}
|
|
l.next()
|
|
if !isDigit(l.peek()) {
|
|
return l.errorf("float cannot end with a dot")
|
|
}
|
|
pointSeen = true
|
|
} else if next == 'e' || next == 'E' {
|
|
expSeen = true
|
|
l.next()
|
|
r := l.peek()
|
|
if r == '+' || r == '-' {
|
|
l.next()
|
|
}
|
|
} else if isDigit(next) {
|
|
digitSeen = true
|
|
l.next()
|
|
} else if next == '_' {
|
|
l.next()
|
|
} else {
|
|
break
|
|
}
|
|
if pointSeen && !digitSeen {
|
|
return l.errorf("cannot start float with a dot")
|
|
}
|
|
}
|
|
|
|
if !digitSeen {
|
|
return l.errorf("no digit in that number")
|
|
}
|
|
if pointSeen || expSeen {
|
|
l.emit(tokenFloat)
|
|
} else {
|
|
l.emit(tokenInteger)
|
|
}
|
|
return l.lexRvalue
|
|
}
|
|
|
|
func (l *tomlLexer) run() {
|
|
for state := l.lexVoid; state != nil; {
|
|
state = state()
|
|
}
|
|
}
|
|
|
|
// Entry point
|
|
func lexToml(inputBytes []byte) []token {
|
|
runes := bytes.Runes(inputBytes)
|
|
l := &tomlLexer{
|
|
input: runes,
|
|
tokens: make([]token, 0, 256),
|
|
line: 1,
|
|
col: 1,
|
|
endbufferLine: 1,
|
|
endbufferCol: 1,
|
|
}
|
|
l.run()
|
|
return l.tokens
|
|
}
|