aboutsummaryrefslogtreecommitdiffstats
path: root/src/dma/vendor/github.com/BurntSushi/toml/lex.go
diff options
context:
space:
mode:
authorToshiaki Takahashi <takahashi.tsc@ncos.nec.co.jp>2018-09-06 09:04:29 +0000
committerToshiaki Takahashi <takahashi.tsc@ncos.nec.co.jp>2018-09-07 06:03:01 +0000
commitd61931341176dad9ccff7c967a10d88fe54218fa (patch)
tree526457882d4abe0c38d2242d6daa311bf8ef51cf /src/dma/vendor/github.com/BurntSushi/toml/lex.go
parent73abc060f31a6bf866fa1dad0a1a6efdfd94d775 (diff)
src: Add DMA localagent
Change-Id: Ibcee814fbc9a904448eeb368a1a26bbb69cf54aa Signed-off-by: Toshiaki Takahashi <takahashi.tsc@ncos.nec.co.jp>
Diffstat (limited to 'src/dma/vendor/github.com/BurntSushi/toml/lex.go')
-rw-r--r--src/dma/vendor/github.com/BurntSushi/toml/lex.go953
1 files changed, 953 insertions, 0 deletions
diff --git a/src/dma/vendor/github.com/BurntSushi/toml/lex.go b/src/dma/vendor/github.com/BurntSushi/toml/lex.go
new file mode 100644
index 00000000..6dee7fc7
--- /dev/null
+++ b/src/dma/vendor/github.com/BurntSushi/toml/lex.go
@@ -0,0 +1,953 @@
+package toml
+
+import (
+ "fmt"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+type itemType int
+
+const (
+ itemError itemType = iota
+ itemNIL // used in the parser to indicate no type
+ itemEOF
+ itemText
+ itemString
+ itemRawString
+ itemMultilineString
+ itemRawMultilineString
+ itemBool
+ itemInteger
+ itemFloat
+ itemDatetime
+ itemArray // the start of an array
+ itemArrayEnd
+ itemTableStart
+ itemTableEnd
+ itemArrayTableStart
+ itemArrayTableEnd
+ itemKeyStart
+ itemCommentStart
+ itemInlineTableStart
+ itemInlineTableEnd
+)
+
+const (
+ eof = 0
+ comma = ','
+ tableStart = '['
+ tableEnd = ']'
+ arrayTableStart = '['
+ arrayTableEnd = ']'
+ tableSep = '.'
+ keySep = '='
+ arrayStart = '['
+ arrayEnd = ']'
+ commentStart = '#'
+ stringStart = '"'
+ stringEnd = '"'
+ rawStringStart = '\''
+ rawStringEnd = '\''
+ inlineTableStart = '{'
+ inlineTableEnd = '}'
+)
+
+type stateFn func(lx *lexer) stateFn
+
+type lexer struct {
+ input string
+ start int
+ pos int
+ line int
+ state stateFn
+ items chan item
+
+ // Allow for backing up up to three runes.
+ // This is necessary because TOML contains 3-rune tokens (""" and ''').
+ prevWidths [3]int
+ nprev int // how many of prevWidths are in use
+ // If we emit an eof, we can still back up, but it is not OK to call
+ // next again.
+ atEOF bool
+
+ // A stack of state functions used to maintain context.
+ // The idea is to reuse parts of the state machine in various places.
+ // For example, values can appear at the top level or within arbitrarily
+ // nested arrays. The last state on the stack is used after a value has
+ // been lexed. Similarly for comments.
+ stack []stateFn
+}
+
+type item struct {
+ typ itemType
+ val string
+ line int
+}
+
+func (lx *lexer) nextItem() item {
+ for {
+ select {
+ case item := <-lx.items:
+ return item
+ default:
+ lx.state = lx.state(lx)
+ }
+ }
+}
+
+func lex(input string) *lexer {
+ lx := &lexer{
+ input: input,
+ state: lexTop,
+ line: 1,
+ items: make(chan item, 10),
+ stack: make([]stateFn, 0, 10),
+ }
+ return lx
+}
+
+func (lx *lexer) push(state stateFn) {
+ lx.stack = append(lx.stack, state)
+}
+
+func (lx *lexer) pop() stateFn {
+ if len(lx.stack) == 0 {
+ return lx.errorf("BUG in lexer: no states to pop")
+ }
+ last := lx.stack[len(lx.stack)-1]
+ lx.stack = lx.stack[0 : len(lx.stack)-1]
+ return last
+}
+
+func (lx *lexer) current() string {
+ return lx.input[lx.start:lx.pos]
+}
+
+func (lx *lexer) emit(typ itemType) {
+ lx.items <- item{typ, lx.current(), lx.line}
+ lx.start = lx.pos
+}
+
+func (lx *lexer) emitTrim(typ itemType) {
+ lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
+ lx.start = lx.pos
+}
+
+func (lx *lexer) next() (r rune) {
+ if lx.atEOF {
+ panic("next called after EOF")
+ }
+ if lx.pos >= len(lx.input) {
+ lx.atEOF = true
+ return eof
+ }
+
+ if lx.input[lx.pos] == '\n' {
+ lx.line++
+ }
+ lx.prevWidths[2] = lx.prevWidths[1]
+ lx.prevWidths[1] = lx.prevWidths[0]
+ if lx.nprev < 3 {
+ lx.nprev++
+ }
+ r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
+ lx.prevWidths[0] = w
+ lx.pos += w
+ return r
+}
+
+// ignore skips over the pending input before this point.
+func (lx *lexer) ignore() {
+ lx.start = lx.pos
+}
+
+// backup steps back one rune. Can be called only twice between calls to next.
+func (lx *lexer) backup() {
+ if lx.atEOF {
+ lx.atEOF = false
+ return
+ }
+ if lx.nprev < 1 {
+ panic("backed up too far")
+ }
+ w := lx.prevWidths[0]
+ lx.prevWidths[0] = lx.prevWidths[1]
+ lx.prevWidths[1] = lx.prevWidths[2]
+ lx.nprev--
+ lx.pos -= w
+ if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
+ lx.line--
+ }
+}
+
+// accept consumes the next rune if it's equal to `valid`.
+func (lx *lexer) accept(valid rune) bool {
+ if lx.next() == valid {
+ return true
+ }
+ lx.backup()
+ return false
+}
+
+// peek returns but does not consume the next rune in the input.
+func (lx *lexer) peek() rune {
+ r := lx.next()
+ lx.backup()
+ return r
+}
+
+// skip ignores all input that matches the given predicate.
+func (lx *lexer) skip(pred func(rune) bool) {
+ for {
+ r := lx.next()
+ if pred(r) {
+ continue
+ }
+ lx.backup()
+ lx.ignore()
+ return
+ }
+}
+
+// errorf stops all lexing by emitting an error and returning `nil`.
+// Note that any value that is a character is escaped if it's a special
+// character (newlines, tabs, etc.).
+func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
+ lx.items <- item{
+ itemError,
+ fmt.Sprintf(format, values...),
+ lx.line,
+ }
+ return nil
+}
+
+// lexTop consumes elements at the top level of TOML data.
+func lexTop(lx *lexer) stateFn {
+ r := lx.next()
+ if isWhitespace(r) || isNL(r) {
+ return lexSkip(lx, lexTop)
+ }
+ switch r {
+ case commentStart:
+ lx.push(lexTop)
+ return lexCommentStart
+ case tableStart:
+ return lexTableStart
+ case eof:
+ if lx.pos > lx.start {
+ return lx.errorf("unexpected EOF")
+ }
+ lx.emit(itemEOF)
+ return nil
+ }
+
+ // At this point, the only valid item can be a key, so we back up
+ // and let the key lexer do the rest.
+ lx.backup()
+ lx.push(lexTopEnd)
+ return lexKeyStart
+}
+
+// lexTopEnd is entered whenever a top-level item has been consumed. (A value
+// or a table.) It must see only whitespace, and will turn back to lexTop
+// upon a newline. If it sees EOF, it will quit the lexer successfully.
+func lexTopEnd(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case r == commentStart:
+ // a comment will read to a newline for us.
+ lx.push(lexTop)
+ return lexCommentStart
+ case isWhitespace(r):
+ return lexTopEnd
+ case isNL(r):
+ lx.ignore()
+ return lexTop
+ case r == eof:
+ lx.emit(itemEOF)
+ return nil
+ }
+ return lx.errorf("expected a top-level item to end with a newline, "+
+ "comment, or EOF, but got %q instead", r)
+}
+
+// lexTable lexes the beginning of a table. Namely, it makes sure that
+// it starts with a character other than '.' and ']'.
+// It assumes that '[' has already been consumed.
+// It also handles the case that this is an item in an array of tables.
+// e.g., '[[name]]'.
+func lexTableStart(lx *lexer) stateFn {
+ if lx.peek() == arrayTableStart {
+ lx.next()
+ lx.emit(itemArrayTableStart)
+ lx.push(lexArrayTableEnd)
+ } else {
+ lx.emit(itemTableStart)
+ lx.push(lexTableEnd)
+ }
+ return lexTableNameStart
+}
+
+func lexTableEnd(lx *lexer) stateFn {
+ lx.emit(itemTableEnd)
+ return lexTopEnd
+}
+
+func lexArrayTableEnd(lx *lexer) stateFn {
+ if r := lx.next(); r != arrayTableEnd {
+ return lx.errorf("expected end of table array name delimiter %q, "+
+ "but got %q instead", arrayTableEnd, r)
+ }
+ lx.emit(itemArrayTableEnd)
+ return lexTopEnd
+}
+
+func lexTableNameStart(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.peek(); {
+ case r == tableEnd || r == eof:
+ return lx.errorf("unexpected end of table name " +
+ "(table names cannot be empty)")
+ case r == tableSep:
+ return lx.errorf("unexpected table separator " +
+ "(table names cannot be empty)")
+ case r == stringStart || r == rawStringStart:
+ lx.ignore()
+ lx.push(lexTableNameEnd)
+ return lexValue // reuse string lexing
+ default:
+ return lexBareTableName
+ }
+}
+
+// lexBareTableName lexes the name of a table. It assumes that at least one
+// valid character for the table has already been read.
+func lexBareTableName(lx *lexer) stateFn {
+ r := lx.next()
+ if isBareKeyChar(r) {
+ return lexBareTableName
+ }
+ lx.backup()
+ lx.emit(itemText)
+ return lexTableNameEnd
+}
+
+// lexTableNameEnd reads the end of a piece of a table name, optionally
+// consuming whitespace.
+func lexTableNameEnd(lx *lexer) stateFn {
+ lx.skip(isWhitespace)
+ switch r := lx.next(); {
+ case isWhitespace(r):
+ return lexTableNameEnd
+ case r == tableSep:
+ lx.ignore()
+ return lexTableNameStart
+ case r == tableEnd:
+ return lx.pop()
+ default:
+ return lx.errorf("expected '.' or ']' to end table name, "+
+ "but got %q instead", r)
+ }
+}
+
+// lexKeyStart consumes a key name up until the first non-whitespace character.
+// lexKeyStart will ignore whitespace.
+func lexKeyStart(lx *lexer) stateFn {
+ r := lx.peek()
+ switch {
+ case r == keySep:
+ return lx.errorf("unexpected key separator %q", keySep)
+ case isWhitespace(r) || isNL(r):
+ lx.next()
+ return lexSkip(lx, lexKeyStart)
+ case r == stringStart || r == rawStringStart:
+ lx.ignore()
+ lx.emit(itemKeyStart)
+ lx.push(lexKeyEnd)
+ return lexValue // reuse string lexing
+ default:
+ lx.ignore()
+ lx.emit(itemKeyStart)
+ return lexBareKey
+ }
+}
+
+// lexBareKey consumes the text of a bare key. Assumes that the first character
+// (which is not whitespace) has not yet been consumed.
+func lexBareKey(lx *lexer) stateFn {
+ switch r := lx.next(); {
+ case isBareKeyChar(r):
+ return lexBareKey
+ case isWhitespace(r):
+ lx.backup()
+ lx.emit(itemText)
+ return lexKeyEnd
+ case r == keySep:
+ lx.backup()
+ lx.emit(itemText)
+ return lexKeyEnd
+ default:
+ return lx.errorf("bare keys cannot contain %q", r)
+ }
+}
+
+// lexKeyEnd consumes the end of a key and trims whitespace (up to the key
+// separator).
+func lexKeyEnd(lx *lexer) stateFn {
+ switch r := lx.next(); {
+ case r == keySep:
+ return lexSkip(lx, lexValue)
+ case isWhitespace(r):
+ return lexSkip(lx, lexKeyEnd)
+ default:
+ return lx.errorf("expected key separator %q, but got %q instead",
+ keySep, r)
+ }
+}
+
+// lexValue starts the consumption of a value anywhere a value is expected.
+// lexValue will ignore whitespace.
+// After a value is lexed, the last state on the next is popped and returned.
+func lexValue(lx *lexer) stateFn {
+ // We allow whitespace to precede a value, but NOT newlines.
+ // In array syntax, the array states are responsible for ignoring newlines.
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexValue)
+ case isDigit(r):
+ lx.backup() // avoid an extra state and use the same as above
+ return lexNumberOrDateStart
+ }
+ switch r {
+ case arrayStart:
+ lx.ignore()
+ lx.emit(itemArray)
+ return lexArrayValue
+ case inlineTableStart:
+ lx.ignore()
+ lx.emit(itemInlineTableStart)
+ return lexInlineTableValue
+ case stringStart:
+ if lx.accept(stringStart) {
+ if lx.accept(stringStart) {
+ lx.ignore() // Ignore """
+ return lexMultilineString
+ }
+ lx.backup()
+ }
+ lx.ignore() // ignore the '"'
+ return lexString
+ case rawStringStart:
+ if lx.accept(rawStringStart) {
+ if lx.accept(rawStringStart) {
+ lx.ignore() // Ignore """
+ return lexMultilineRawString
+ }
+ lx.backup()
+ }
+ lx.ignore() // ignore the "'"
+ return lexRawString
+ case '+', '-':
+ return lexNumberStart
+ case '.': // special error case, be kind to users
+ return lx.errorf("floats must start with a digit, not '.'")
+ }
+ if unicode.IsLetter(r) {
+ // Be permissive here; lexBool will give a nice error if the
+ // user wrote something like
+ // x = foo
+ // (i.e. not 'true' or 'false' but is something else word-like.)
+ lx.backup()
+ return lexBool
+ }
+ return lx.errorf("expected value but found %q instead", r)
+}
+
+// lexArrayValue consumes one value in an array. It assumes that '[' or ','
+// have already been consumed. All whitespace and newlines are ignored.
+func lexArrayValue(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r) || isNL(r):
+ return lexSkip(lx, lexArrayValue)
+ case r == commentStart:
+ lx.push(lexArrayValue)
+ return lexCommentStart
+ case r == comma:
+ return lx.errorf("unexpected comma")
+ case r == arrayEnd:
+ // NOTE(caleb): The spec isn't clear about whether you can have
+ // a trailing comma or not, so we'll allow it.
+ return lexArrayEnd
+ }
+
+ lx.backup()
+ lx.push(lexArrayValueEnd)
+ return lexValue
+}
+
+// lexArrayValueEnd consumes everything between the end of an array value and
+// the next value (or the end of the array): it ignores whitespace and newlines
+// and expects either a ',' or a ']'.
+func lexArrayValueEnd(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r) || isNL(r):
+ return lexSkip(lx, lexArrayValueEnd)
+ case r == commentStart:
+ lx.push(lexArrayValueEnd)
+ return lexCommentStart
+ case r == comma:
+ lx.ignore()
+ return lexArrayValue // move on to the next value
+ case r == arrayEnd:
+ return lexArrayEnd
+ }
+ return lx.errorf(
+ "expected a comma or array terminator %q, but got %q instead",
+ arrayEnd, r,
+ )
+}
+
+// lexArrayEnd finishes the lexing of an array.
+// It assumes that a ']' has just been consumed.
+func lexArrayEnd(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemArrayEnd)
+ return lx.pop()
+}
+
+// lexInlineTableValue consumes one key/value pair in an inline table.
+// It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
+func lexInlineTableValue(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexInlineTableValue)
+ case isNL(r):
+ return lx.errorf("newlines not allowed within inline tables")
+ case r == commentStart:
+ lx.push(lexInlineTableValue)
+ return lexCommentStart
+ case r == comma:
+ return lx.errorf("unexpected comma")
+ case r == inlineTableEnd:
+ return lexInlineTableEnd
+ }
+ lx.backup()
+ lx.push(lexInlineTableValueEnd)
+ return lexKeyStart
+}
+
+// lexInlineTableValueEnd consumes everything between the end of an inline table
+// key/value pair and the next pair (or the end of the table):
+// it ignores whitespace and expects either a ',' or a '}'.
+func lexInlineTableValueEnd(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case isWhitespace(r):
+ return lexSkip(lx, lexInlineTableValueEnd)
+ case isNL(r):
+ return lx.errorf("newlines not allowed within inline tables")
+ case r == commentStart:
+ lx.push(lexInlineTableValueEnd)
+ return lexCommentStart
+ case r == comma:
+ lx.ignore()
+ return lexInlineTableValue
+ case r == inlineTableEnd:
+ return lexInlineTableEnd
+ }
+ return lx.errorf("expected a comma or an inline table terminator %q, "+
+ "but got %q instead", inlineTableEnd, r)
+}
+
+// lexInlineTableEnd finishes the lexing of an inline table.
+// It assumes that a '}' has just been consumed.
+func lexInlineTableEnd(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemInlineTableEnd)
+ return lx.pop()
+}
+
+// lexString consumes the inner contents of a string. It assumes that the
+// beginning '"' has already been consumed and ignored.
+func lexString(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case r == eof:
+ return lx.errorf("unexpected EOF")
+ case isNL(r):
+ return lx.errorf("strings cannot contain newlines")
+ case r == '\\':
+ lx.push(lexString)
+ return lexStringEscape
+ case r == stringEnd:
+ lx.backup()
+ lx.emit(itemString)
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ return lexString
+}
+
+// lexMultilineString consumes the inner contents of a string. It assumes that
+// the beginning '"""' has already been consumed and ignored.
+func lexMultilineString(lx *lexer) stateFn {
+ switch lx.next() {
+ case eof:
+ return lx.errorf("unexpected EOF")
+ case '\\':
+ return lexMultilineStringEscape
+ case stringEnd:
+ if lx.accept(stringEnd) {
+ if lx.accept(stringEnd) {
+ lx.backup()
+ lx.backup()
+ lx.backup()
+ lx.emit(itemMultilineString)
+ lx.next()
+ lx.next()
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ lx.backup()
+ }
+ }
+ return lexMultilineString
+}
+
+// lexRawString consumes a raw string. Nothing can be escaped in such a string.
+// It assumes that the beginning "'" has already been consumed and ignored.
+func lexRawString(lx *lexer) stateFn {
+ r := lx.next()
+ switch {
+ case r == eof:
+ return lx.errorf("unexpected EOF")
+ case isNL(r):
+ return lx.errorf("strings cannot contain newlines")
+ case r == rawStringEnd:
+ lx.backup()
+ lx.emit(itemRawString)
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ return lexRawString
+}
+
+// lexMultilineRawString consumes a raw string. Nothing can be escaped in such
+// a string. It assumes that the beginning "'''" has already been consumed and
+// ignored.
+func lexMultilineRawString(lx *lexer) stateFn {
+ switch lx.next() {
+ case eof:
+ return lx.errorf("unexpected EOF")
+ case rawStringEnd:
+ if lx.accept(rawStringEnd) {
+ if lx.accept(rawStringEnd) {
+ lx.backup()
+ lx.backup()
+ lx.backup()
+ lx.emit(itemRawMultilineString)
+ lx.next()
+ lx.next()
+ lx.next()
+ lx.ignore()
+ return lx.pop()
+ }
+ lx.backup()
+ }
+ }
+ return lexMultilineRawString
+}
+
+// lexMultilineStringEscape consumes an escaped character. It assumes that the
+// preceding '\\' has already been consumed.
+func lexMultilineStringEscape(lx *lexer) stateFn {
+ // Handle the special case first:
+ if isNL(lx.next()) {
+ return lexMultilineString
+ }
+ lx.backup()
+ lx.push(lexMultilineString)
+ return lexStringEscape(lx)
+}
+
+func lexStringEscape(lx *lexer) stateFn {
+ r := lx.next()
+ switch r {
+ case 'b':
+ fallthrough
+ case 't':
+ fallthrough
+ case 'n':
+ fallthrough
+ case 'f':
+ fallthrough
+ case 'r':
+ fallthrough
+ case '"':
+ fallthrough
+ case '\\':
+ return lx.pop()
+ case 'u':
+ return lexShortUnicodeEscape
+ case 'U':
+ return lexLongUnicodeEscape
+ }
+ return lx.errorf("invalid escape character %q; only the following "+
+ "escape characters are allowed: "+
+ `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
+}
+
+func lexShortUnicodeEscape(lx *lexer) stateFn {
+ var r rune
+ for i := 0; i < 4; i++ {
+ r = lx.next()
+ if !isHexadecimal(r) {
+ return lx.errorf(`expected four hexadecimal digits after '\u', `+
+ "but got %q instead", lx.current())
+ }
+ }
+ return lx.pop()
+}
+
+func lexLongUnicodeEscape(lx *lexer) stateFn {
+ var r rune
+ for i := 0; i < 8; i++ {
+ r = lx.next()
+ if !isHexadecimal(r) {
+ return lx.errorf(`expected eight hexadecimal digits after '\U', `+
+ "but got %q instead", lx.current())
+ }
+ }
+ return lx.pop()
+}
+
+// lexNumberOrDateStart consumes either an integer, a float, or datetime.
+func lexNumberOrDateStart(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexNumberOrDate
+ }
+ switch r {
+ case '_':
+ return lexNumber
+ case 'e', 'E':
+ return lexFloat
+ case '.':
+ return lx.errorf("floats must start with a digit, not '.'")
+ }
+ return lx.errorf("expected a digit but got %q", r)
+}
+
+// lexNumberOrDate consumes either an integer, float or datetime.
+func lexNumberOrDate(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexNumberOrDate
+ }
+ switch r {
+ case '-':
+ return lexDatetime
+ case '_':
+ return lexNumber
+ case '.', 'e', 'E':
+ return lexFloat
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexDatetime consumes a Datetime, to a first approximation.
+// The parser validates that it matches one of the accepted formats.
+func lexDatetime(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexDatetime
+ }
+ switch r {
+ case '-', 'T', ':', '.', 'Z':
+ return lexDatetime
+ }
+
+ lx.backup()
+ lx.emit(itemDatetime)
+ return lx.pop()
+}
+
+// lexNumberStart consumes either an integer or a float. It assumes that a sign
+// has already been read, but that *no* digits have been consumed.
+// lexNumberStart will move to the appropriate integer or float states.
+func lexNumberStart(lx *lexer) stateFn {
+ // We MUST see a digit. Even floats have to start with a digit.
+ r := lx.next()
+ if !isDigit(r) {
+ if r == '.' {
+ return lx.errorf("floats must start with a digit, not '.'")
+ }
+ return lx.errorf("expected a digit but got %q", r)
+ }
+ return lexNumber
+}
+
+// lexNumber consumes an integer or a float after seeing the first digit.
+func lexNumber(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexNumber
+ }
+ switch r {
+ case '_':
+ return lexNumber
+ case '.', 'e', 'E':
+ return lexFloat
+ }
+
+ lx.backup()
+ lx.emit(itemInteger)
+ return lx.pop()
+}
+
+// lexFloat consumes the elements of a float. It allows any sequence of
+// float-like characters, so floats emitted by the lexer are only a first
+// approximation and must be validated by the parser.
+func lexFloat(lx *lexer) stateFn {
+ r := lx.next()
+ if isDigit(r) {
+ return lexFloat
+ }
+ switch r {
+ case '_', '.', '-', '+', 'e', 'E':
+ return lexFloat
+ }
+
+ lx.backup()
+ lx.emit(itemFloat)
+ return lx.pop()
+}
+
+// lexBool consumes a bool string: 'true' or 'false.
+func lexBool(lx *lexer) stateFn {
+ var rs []rune
+ for {
+ r := lx.next()
+ if !unicode.IsLetter(r) {
+ lx.backup()
+ break
+ }
+ rs = append(rs, r)
+ }
+ s := string(rs)
+ switch s {
+ case "true", "false":
+ lx.emit(itemBool)
+ return lx.pop()
+ }
+ return lx.errorf("expected value but found %q instead", s)
+}
+
+// lexCommentStart begins the lexing of a comment. It will emit
+// itemCommentStart and consume no characters, passing control to lexComment.
+func lexCommentStart(lx *lexer) stateFn {
+ lx.ignore()
+ lx.emit(itemCommentStart)
+ return lexComment
+}
+
+// lexComment lexes an entire comment. It assumes that '#' has been consumed.
+// It will consume *up to* the first newline character, and pass control
+// back to the last state on the stack.
+func lexComment(lx *lexer) stateFn {
+ r := lx.peek()
+ if isNL(r) || r == eof {
+ lx.emit(itemText)
+ return lx.pop()
+ }
+ lx.next()
+ return lexComment
+}
+
+// lexSkip ignores all slurped input and moves on to the next state.
+func lexSkip(lx *lexer, nextState stateFn) stateFn {
+ return func(lx *lexer) stateFn {
+ lx.ignore()
+ return nextState
+ }
+}
+
+// isWhitespace returns true if `r` is a whitespace character according
+// to the spec.
+func isWhitespace(r rune) bool {
+ return r == '\t' || r == ' '
+}
+
+func isNL(r rune) bool {
+ return r == '\n' || r == '\r'
+}
+
+func isDigit(r rune) bool {
+ return r >= '0' && r <= '9'
+}
+
+func isHexadecimal(r rune) bool {
+ return (r >= '0' && r <= '9') ||
+ (r >= 'a' && r <= 'f') ||
+ (r >= 'A' && r <= 'F')
+}
+
+func isBareKeyChar(r rune) bool {
+ return (r >= 'A' && r <= 'Z') ||
+ (r >= 'a' && r <= 'z') ||
+ (r >= '0' && r <= '9') ||
+ r == '_' ||
+ r == '-'
+}
+
+func (itype itemType) String() string {
+ switch itype {
+ case itemError:
+ return "Error"
+ case itemNIL:
+ return "NIL"
+ case itemEOF:
+ return "EOF"
+ case itemText:
+ return "Text"
+ case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
+ return "String"
+ case itemBool:
+ return "Bool"
+ case itemInteger:
+ return "Integer"
+ case itemFloat:
+ return "Float"
+ case itemDatetime:
+ return "DateTime"
+ case itemTableStart:
+ return "TableStart"
+ case itemTableEnd:
+ return "TableEnd"
+ case itemKeyStart:
+ return "KeyStart"
+ case itemArray:
+ return "Array"
+ case itemArrayEnd:
+ return "ArrayEnd"
+ case itemCommentStart:
+ return "CommentStart"
+ }
+ panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
+}
+
+func (item item) String() string {
+ return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
+}