summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/hashicorp/hcl/json/scanner
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/hashicorp/hcl/json/scanner')
-rw-r--r--vendor/github.com/hashicorp/hcl/json/scanner/scanner.go451
-rw-r--r--vendor/github.com/hashicorp/hcl/json/scanner/scanner_test.go362
2 files changed, 813 insertions, 0 deletions
diff --git a/vendor/github.com/hashicorp/hcl/json/scanner/scanner.go b/vendor/github.com/hashicorp/hcl/json/scanner/scanner.go
new file mode 100644
index 000000000..dd5c72bb3
--- /dev/null
+++ b/vendor/github.com/hashicorp/hcl/json/scanner/scanner.go
@@ -0,0 +1,451 @@
+package scanner
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "unicode"
+ "unicode/utf8"
+
+ "github.com/hashicorp/hcl/json/token"
+)
+
+// eof represents a marker rune for the end of the reader.
+const eof = rune(0)
+
+// Scanner defines a lexical scanner
+type Scanner struct {
+ buf *bytes.Buffer // Source buffer for advancing and scanning
+ src []byte // Source buffer for immutable access
+
+ // Source Position
+ srcPos token.Pos // current position
+ prevPos token.Pos // previous position, used for peek() method
+
+ lastCharLen int // length of last character in bytes
+ lastLineLen int // length of last line in characters (for correct column reporting)
+
+ tokStart int // token text start position
+ tokEnd int // token text end position
+
+ // Error is called for each error encountered. If no Error
+ // function is set, the error is reported to os.Stderr.
+ Error func(pos token.Pos, msg string)
+
+ // ErrorCount is incremented by one for each error encountered.
+ ErrorCount int
+
+ // tokPos is the start position of most recently scanned token; set by
+ // Scan. The Filename field is always left untouched by the Scanner. If
+ // an error is reported (via Error) and Position is invalid, the scanner is
+ // not inside a token.
+ tokPos token.Pos
+}
+
+// New creates and initializes a new instance of Scanner using src as
+// its source content.
+func New(src []byte) *Scanner {
+ // even though we accept a src, we read from a io.Reader compatible type
+ // (*bytes.Buffer). So in the future we might easily change it to streaming
+ // read.
+ b := bytes.NewBuffer(src)
+ s := &Scanner{
+ buf: b,
+ src: src,
+ }
+
+ // srcPosition always starts with 1
+ s.srcPos.Line = 1
+ return s
+}
+
+// next reads the next rune from the bufferred reader. Returns the rune(0) if
+// an error occurs (or io.EOF is returned).
+func (s *Scanner) next() rune {
+ ch, size, err := s.buf.ReadRune()
+ if err != nil {
+ // advance for error reporting
+ s.srcPos.Column++
+ s.srcPos.Offset += size
+ s.lastCharLen = size
+ return eof
+ }
+
+ if ch == utf8.RuneError && size == 1 {
+ s.srcPos.Column++
+ s.srcPos.Offset += size
+ s.lastCharLen = size
+ s.err("illegal UTF-8 encoding")
+ return ch
+ }
+
+ // remember last position
+ s.prevPos = s.srcPos
+
+ s.srcPos.Column++
+ s.lastCharLen = size
+ s.srcPos.Offset += size
+
+ if ch == '\n' {
+ s.srcPos.Line++
+ s.lastLineLen = s.srcPos.Column
+ s.srcPos.Column = 0
+ }
+
+ // debug
+ // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
+ return ch
+}
+
+// unread unreads the previous read Rune and updates the source position
+func (s *Scanner) unread() {
+ if err := s.buf.UnreadRune(); err != nil {
+ panic(err) // this is user fault, we should catch it
+ }
+ s.srcPos = s.prevPos // put back last position
+}
+
+// peek returns the next rune without advancing the reader.
+func (s *Scanner) peek() rune {
+ peek, _, err := s.buf.ReadRune()
+ if err != nil {
+ return eof
+ }
+
+ s.buf.UnreadRune()
+ return peek
+}
+
+// Scan scans the next token and returns the token.
+func (s *Scanner) Scan() token.Token {
+ ch := s.next()
+
+ // skip white space
+ for isWhitespace(ch) {
+ ch = s.next()
+ }
+
+ var tok token.Type
+
+ // token text markings
+ s.tokStart = s.srcPos.Offset - s.lastCharLen
+
+ // token position, initial next() is moving the offset by one(size of rune
+ // actually), though we are interested with the starting point
+ s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
+ if s.srcPos.Column > 0 {
+ // common case: last character was not a '\n'
+ s.tokPos.Line = s.srcPos.Line
+ s.tokPos.Column = s.srcPos.Column
+ } else {
+ // last character was a '\n'
+ // (we cannot be at the beginning of the source
+ // since we have called next() at least once)
+ s.tokPos.Line = s.srcPos.Line - 1
+ s.tokPos.Column = s.lastLineLen
+ }
+
+ switch {
+ case isLetter(ch):
+ lit := s.scanIdentifier()
+ if lit == "true" || lit == "false" {
+ tok = token.BOOL
+ } else if lit == "null" {
+ tok = token.NULL
+ } else {
+ s.err("illegal char")
+ }
+ case isDecimal(ch):
+ tok = s.scanNumber(ch)
+ default:
+ switch ch {
+ case eof:
+ tok = token.EOF
+ case '"':
+ tok = token.STRING
+ s.scanString()
+ case '.':
+ tok = token.PERIOD
+ ch = s.peek()
+ if isDecimal(ch) {
+ tok = token.FLOAT
+ ch = s.scanMantissa(ch)
+ ch = s.scanExponent(ch)
+ }
+ case '[':
+ tok = token.LBRACK
+ case ']':
+ tok = token.RBRACK
+ case '{':
+ tok = token.LBRACE
+ case '}':
+ tok = token.RBRACE
+ case ',':
+ tok = token.COMMA
+ case ':':
+ tok = token.COLON
+ case '-':
+ if isDecimal(s.peek()) {
+ ch := s.next()
+ tok = s.scanNumber(ch)
+ } else {
+ s.err("illegal char")
+ }
+ default:
+ s.err("illegal char: " + string(ch))
+ }
+ }
+
+ // finish token ending
+ s.tokEnd = s.srcPos.Offset
+
+ // create token literal
+ var tokenText string
+ if s.tokStart >= 0 {
+ tokenText = string(s.src[s.tokStart:s.tokEnd])
+ }
+ s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
+
+ return token.Token{
+ Type: tok,
+ Pos: s.tokPos,
+ Text: tokenText,
+ }
+}
+
+// scanNumber scans a HCL number definition starting with the given rune
+func (s *Scanner) scanNumber(ch rune) token.Type {
+ zero := ch == '0'
+ pos := s.srcPos
+
+ s.scanMantissa(ch)
+ ch = s.next() // seek forward
+ if ch == 'e' || ch == 'E' {
+ ch = s.scanExponent(ch)
+ return token.FLOAT
+ }
+
+ if ch == '.' {
+ ch = s.scanFraction(ch)
+ if ch == 'e' || ch == 'E' {
+ ch = s.next()
+ ch = s.scanExponent(ch)
+ }
+ return token.FLOAT
+ }
+
+ if ch != eof {
+ s.unread()
+ }
+
+ // If we have a larger number and this is zero, error
+ if zero && pos != s.srcPos {
+ s.err("numbers cannot start with 0")
+ }
+
+ return token.NUMBER
+}
+
+// scanMantissa scans the mantissa begining from the rune. It returns the next
+// non decimal rune. It's used to determine wheter it's a fraction or exponent.
+func (s *Scanner) scanMantissa(ch rune) rune {
+ scanned := false
+ for isDecimal(ch) {
+ ch = s.next()
+ scanned = true
+ }
+
+ if scanned && ch != eof {
+ s.unread()
+ }
+ return ch
+}
+
+// scanFraction scans the fraction after the '.' rune
+func (s *Scanner) scanFraction(ch rune) rune {
+ if ch == '.' {
+ ch = s.peek() // we peek just to see if we can move forward
+ ch = s.scanMantissa(ch)
+ }
+ return ch
+}
+
+// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
+// rune.
+func (s *Scanner) scanExponent(ch rune) rune {
+ if ch == 'e' || ch == 'E' {
+ ch = s.next()
+ if ch == '-' || ch == '+' {
+ ch = s.next()
+ }
+ ch = s.scanMantissa(ch)
+ }
+ return ch
+}
+
+// scanString scans a quoted string
+func (s *Scanner) scanString() {
+ braces := 0
+ for {
+ // '"' opening already consumed
+ // read character after quote
+ ch := s.next()
+
+ if ch == '\n' || ch < 0 || ch == eof {
+ s.err("literal not terminated")
+ return
+ }
+
+ if ch == '"' {
+ break
+ }
+
+ // If we're going into a ${} then we can ignore quotes for awhile
+ if braces == 0 && ch == '$' && s.peek() == '{' {
+ braces++
+ s.next()
+ } else if braces > 0 && ch == '{' {
+ braces++
+ }
+ if braces > 0 && ch == '}' {
+ braces--
+ }
+
+ if ch == '\\' {
+ s.scanEscape()
+ }
+ }
+
+ return
+}
+
+// scanEscape scans an escape sequence
+func (s *Scanner) scanEscape() rune {
+ // http://en.cppreference.com/w/cpp/language/escape
+ ch := s.next() // read character after '/'
+ switch ch {
+ case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
+ // nothing to do
+ case '0', '1', '2', '3', '4', '5', '6', '7':
+ // octal notation
+ ch = s.scanDigits(ch, 8, 3)
+ case 'x':
+ // hexademical notation
+ ch = s.scanDigits(s.next(), 16, 2)
+ case 'u':
+ // universal character name
+ ch = s.scanDigits(s.next(), 16, 4)
+ case 'U':
+ // universal character name
+ ch = s.scanDigits(s.next(), 16, 8)
+ default:
+ s.err("illegal char escape")
+ }
+ return ch
+}
+
+// scanDigits scans a rune with the given base for n times. For example an
+// octal notation \184 would yield in scanDigits(ch, 8, 3)
+func (s *Scanner) scanDigits(ch rune, base, n int) rune {
+ for n > 0 && digitVal(ch) < base {
+ ch = s.next()
+ n--
+ }
+ if n > 0 {
+ s.err("illegal char escape")
+ }
+
+ // we scanned all digits, put the last non digit char back
+ s.unread()
+ return ch
+}
+
+// scanIdentifier scans an identifier and returns the literal string
+func (s *Scanner) scanIdentifier() string {
+ offs := s.srcPos.Offset - s.lastCharLen
+ ch := s.next()
+ for isLetter(ch) || isDigit(ch) || ch == '-' {
+ ch = s.next()
+ }
+
+ if ch != eof {
+ s.unread() // we got identifier, put back latest char
+ }
+
+ return string(s.src[offs:s.srcPos.Offset])
+}
+
+// recentPosition returns the position of the character immediately after the
+// character or token returned by the last call to Scan.
+func (s *Scanner) recentPosition() (pos token.Pos) {
+ pos.Offset = s.srcPos.Offset - s.lastCharLen
+ switch {
+ case s.srcPos.Column > 0:
+ // common case: last character was not a '\n'
+ pos.Line = s.srcPos.Line
+ pos.Column = s.srcPos.Column
+ case s.lastLineLen > 0:
+ // last character was a '\n'
+ // (we cannot be at the beginning of the source
+ // since we have called next() at least once)
+ pos.Line = s.srcPos.Line - 1
+ pos.Column = s.lastLineLen
+ default:
+ // at the beginning of the source
+ pos.Line = 1
+ pos.Column = 1
+ }
+ return
+}
+
+// err prints the error of any scanning to s.Error function. If the function is
+// not defined, by default it prints them to os.Stderr
+func (s *Scanner) err(msg string) {
+ s.ErrorCount++
+ pos := s.recentPosition()
+
+ if s.Error != nil {
+ s.Error(pos, msg)
+ return
+ }
+
+ fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
+}
+
+// isHexadecimal returns true if the given rune is a letter
+func isLetter(ch rune) bool {
+ return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
+}
+
+// isHexadecimal returns true if the given rune is a decimal digit
+func isDigit(ch rune) bool {
+ return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
+}
+
+// isHexadecimal returns true if the given rune is a decimal number
+func isDecimal(ch rune) bool {
+ return '0' <= ch && ch <= '9'
+}
+
+// isHexadecimal returns true if the given rune is an hexadecimal number
+func isHexadecimal(ch rune) bool {
+ return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
+}
+
+// isWhitespace returns true if the rune is a space, tab, newline or carriage return
+func isWhitespace(ch rune) bool {
+ return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
+}
+
+// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
+func digitVal(ch rune) int {
+ switch {
+ case '0' <= ch && ch <= '9':
+ return int(ch - '0')
+ case 'a' <= ch && ch <= 'f':
+ return int(ch - 'a' + 10)
+ case 'A' <= ch && ch <= 'F':
+ return int(ch - 'A' + 10)
+ }
+ return 16 // larger than any legal digit val
+}
diff --git a/vendor/github.com/hashicorp/hcl/json/scanner/scanner_test.go b/vendor/github.com/hashicorp/hcl/json/scanner/scanner_test.go
new file mode 100644
index 000000000..3033a5797
--- /dev/null
+++ b/vendor/github.com/hashicorp/hcl/json/scanner/scanner_test.go
@@ -0,0 +1,362 @@
+package scanner
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+
+ "github.com/hashicorp/hcl/json/token"
+)
+
+var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+
+type tokenPair struct {
+ tok token.Type
+ text string
+}
+
+var tokenLists = map[string][]tokenPair{
+ "operator": []tokenPair{
+ {token.LBRACK, "["},
+ {token.LBRACE, "{"},
+ {token.COMMA, ","},
+ {token.PERIOD, "."},
+ {token.RBRACK, "]"},
+ {token.RBRACE, "}"},
+ },
+ "bool": []tokenPair{
+ {token.BOOL, "true"},
+ {token.BOOL, "false"},
+ },
+ "string": []tokenPair{
+ {token.STRING, `" "`},
+ {token.STRING, `"a"`},
+ {token.STRING, `"本"`},
+ {token.STRING, `"${file(\"foo\")}"`},
+ {token.STRING, `"\a"`},
+ {token.STRING, `"\b"`},
+ {token.STRING, `"\f"`},
+ {token.STRING, `"\n"`},
+ {token.STRING, `"\r"`},
+ {token.STRING, `"\t"`},
+ {token.STRING, `"\v"`},
+ {token.STRING, `"\""`},
+ {token.STRING, `"\000"`},
+ {token.STRING, `"\777"`},
+ {token.STRING, `"\x00"`},
+ {token.STRING, `"\xff"`},
+ {token.STRING, `"\u0000"`},
+ {token.STRING, `"\ufA16"`},
+ {token.STRING, `"\U00000000"`},
+ {token.STRING, `"\U0000ffAB"`},
+ {token.STRING, `"` + f100 + `"`},
+ },
+ "number": []tokenPair{
+ {token.NUMBER, "0"},
+ {token.NUMBER, "1"},
+ {token.NUMBER, "9"},
+ {token.NUMBER, "42"},
+ {token.NUMBER, "1234567890"},
+ {token.NUMBER, "-0"},
+ {token.NUMBER, "-1"},
+ {token.NUMBER, "-9"},
+ {token.NUMBER, "-42"},
+ {token.NUMBER, "-1234567890"},
+ },
+ "float": []tokenPair{
+ {token.FLOAT, "0."},
+ {token.FLOAT, "1."},
+ {token.FLOAT, "42."},
+ {token.FLOAT, "01234567890."},
+ {token.FLOAT, ".0"},
+ {token.FLOAT, ".1"},
+ {token.FLOAT, ".42"},
+ {token.FLOAT, ".0123456789"},
+ {token.FLOAT, "0.0"},
+ {token.FLOAT, "1.0"},
+ {token.FLOAT, "42.0"},
+ {token.FLOAT, "01234567890.0"},
+ {token.FLOAT, "0e0"},
+ {token.FLOAT, "1e0"},
+ {token.FLOAT, "42e0"},
+ {token.FLOAT, "01234567890e0"},
+ {token.FLOAT, "0E0"},
+ {token.FLOAT, "1E0"},
+ {token.FLOAT, "42E0"},
+ {token.FLOAT, "01234567890E0"},
+ {token.FLOAT, "0e+10"},
+ {token.FLOAT, "1e-10"},
+ {token.FLOAT, "42e+10"},
+ {token.FLOAT, "01234567890e-10"},
+ {token.FLOAT, "0E+10"},
+ {token.FLOAT, "1E-10"},
+ {token.FLOAT, "42E+10"},
+ {token.FLOAT, "01234567890E-10"},
+ {token.FLOAT, "01.8e0"},
+ {token.FLOAT, "1.4e0"},
+ {token.FLOAT, "42.2e0"},
+ {token.FLOAT, "01234567890.12e0"},
+ {token.FLOAT, "0.E0"},
+ {token.FLOAT, "1.12E0"},
+ {token.FLOAT, "42.123E0"},
+ {token.FLOAT, "01234567890.213E0"},
+ {token.FLOAT, "0.2e+10"},
+ {token.FLOAT, "1.2e-10"},
+ {token.FLOAT, "42.54e+10"},
+ {token.FLOAT, "01234567890.98e-10"},
+ {token.FLOAT, "0.1E+10"},
+ {token.FLOAT, "1.1E-10"},
+ {token.FLOAT, "42.1E+10"},
+ {token.FLOAT, "01234567890.1E-10"},
+ {token.FLOAT, "-0.0"},
+ {token.FLOAT, "-1.0"},
+ {token.FLOAT, "-42.0"},
+ {token.FLOAT, "-01234567890.0"},
+ {token.FLOAT, "-0e0"},
+ {token.FLOAT, "-1e0"},
+ {token.FLOAT, "-42e0"},
+ {token.FLOAT, "-01234567890e0"},
+ {token.FLOAT, "-0E0"},
+ {token.FLOAT, "-1E0"},
+ {token.FLOAT, "-42E0"},
+ {token.FLOAT, "-01234567890E0"},
+ {token.FLOAT, "-0e+10"},
+ {token.FLOAT, "-1e-10"},
+ {token.FLOAT, "-42e+10"},
+ {token.FLOAT, "-01234567890e-10"},
+ {token.FLOAT, "-0E+10"},
+ {token.FLOAT, "-1E-10"},
+ {token.FLOAT, "-42E+10"},
+ {token.FLOAT, "-01234567890E-10"},
+ {token.FLOAT, "-01.8e0"},
+ {token.FLOAT, "-1.4e0"},
+ {token.FLOAT, "-42.2e0"},
+ {token.FLOAT, "-01234567890.12e0"},
+ {token.FLOAT, "-0.E0"},
+ {token.FLOAT, "-1.12E0"},
+ {token.FLOAT, "-42.123E0"},
+ {token.FLOAT, "-01234567890.213E0"},
+ {token.FLOAT, "-0.2e+10"},
+ {token.FLOAT, "-1.2e-10"},
+ {token.FLOAT, "-42.54e+10"},
+ {token.FLOAT, "-01234567890.98e-10"},
+ {token.FLOAT, "-0.1E+10"},
+ {token.FLOAT, "-1.1E-10"},
+ {token.FLOAT, "-42.1E+10"},
+ {token.FLOAT, "-01234567890.1E-10"},
+ },
+}
+
+var orderedTokenLists = []string{
+ "comment",
+ "operator",
+ "bool",
+ "string",
+ "number",
+ "float",
+}
+
+func TestPosition(t *testing.T) {
+ // create artifical source code
+ buf := new(bytes.Buffer)
+
+ for _, listName := range orderedTokenLists {
+ for _, ident := range tokenLists[listName] {
+ fmt.Fprintf(buf, "\t\t\t\t%s\n", ident.text)
+ }
+ }
+
+ s := New(buf.Bytes())
+
+ pos := token.Pos{"", 4, 1, 5}
+ s.Scan()
+ for _, listName := range orderedTokenLists {
+
+ for _, k := range tokenLists[listName] {
+ curPos := s.tokPos
+ // fmt.Printf("[%q] s = %+v:%+v\n", k.text, curPos.Offset, curPos.Column)
+
+ if curPos.Offset != pos.Offset {
+ t.Fatalf("offset = %d, want %d for %q", curPos.Offset, pos.Offset, k.text)
+ }
+ if curPos.Line != pos.Line {
+ t.Fatalf("line = %d, want %d for %q", curPos.Line, pos.Line, k.text)
+ }
+ if curPos.Column != pos.Column {
+ t.Fatalf("column = %d, want %d for %q", curPos.Column, pos.Column, k.text)
+ }
+ pos.Offset += 4 + len(k.text) + 1 // 4 tabs + token bytes + newline
+ pos.Line += countNewlines(k.text) + 1 // each token is on a new line
+
+ s.Error = func(pos token.Pos, msg string) {
+ t.Errorf("error %q for %q", msg, k.text)
+ }
+
+ s.Scan()
+ }
+ }
+ // make sure there were no token-internal errors reported by scanner
+ if s.ErrorCount != 0 {
+ t.Errorf("%d errors", s.ErrorCount)
+ }
+}
+
+func TestComment(t *testing.T) {
+ testTokenList(t, tokenLists["comment"])
+}
+
+func TestOperator(t *testing.T) {
+ testTokenList(t, tokenLists["operator"])
+}
+
+func TestBool(t *testing.T) {
+ testTokenList(t, tokenLists["bool"])
+}
+
+func TestIdent(t *testing.T) {
+ testTokenList(t, tokenLists["ident"])
+}
+
+func TestString(t *testing.T) {
+ testTokenList(t, tokenLists["string"])
+}
+
+func TestNumber(t *testing.T) {
+ testTokenList(t, tokenLists["number"])
+}
+
+func TestFloat(t *testing.T) {
+ testTokenList(t, tokenLists["float"])
+}
+
+func TestRealExample(t *testing.T) {
+ complexReal := `
+{
+ "variable": {
+ "foo": {
+ "default": "bar",
+ "description": "bar",
+ "depends_on": ["something"]
+ }
+ }
+}`
+
+ literals := []struct {
+ tokenType token.Type
+ literal string
+ }{
+ {token.LBRACE, `{`},
+ {token.STRING, `"variable"`},
+ {token.COLON, `:`},
+ {token.LBRACE, `{`},
+ {token.STRING, `"foo"`},
+ {token.COLON, `:`},
+ {token.LBRACE, `{`},
+ {token.STRING, `"default"`},
+ {token.COLON, `:`},
+ {token.STRING, `"bar"`},
+ {token.COMMA, `,`},
+ {token.STRING, `"description"`},
+ {token.COLON, `:`},
+ {token.STRING, `"bar"`},
+ {token.COMMA, `,`},
+ {token.STRING, `"depends_on"`},
+ {token.COLON, `:`},
+ {token.LBRACK, `[`},
+ {token.STRING, `"something"`},
+ {token.RBRACK, `]`},
+ {token.RBRACE, `}`},
+ {token.RBRACE, `}`},
+ {token.RBRACE, `}`},
+ {token.EOF, ``},
+ }
+
+ s := New([]byte(complexReal))
+ for _, l := range literals {
+ tok := s.Scan()
+ if l.tokenType != tok.Type {
+ t.Errorf("got: %s want %s for %s\n", tok, l.tokenType, tok.String())
+ }
+
+ if l.literal != tok.Text {
+ t.Errorf("got: %s want %s\n", tok, l.literal)
+ }
+ }
+
+}
+
+func TestError(t *testing.T) {
+ testError(t, "\x80", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
+ testError(t, "\xff", "1:1", "illegal UTF-8 encoding", token.ILLEGAL)
+
+ testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", token.STRING)
+ testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", token.STRING)
+
+ testError(t, `01238`, "1:7", "numbers cannot start with 0", token.NUMBER)
+ testError(t, `01238123`, "1:10", "numbers cannot start with 0", token.NUMBER)
+ testError(t, `'aa'`, "1:1", "illegal char: '", token.ILLEGAL)
+
+ testError(t, `"`, "1:2", "literal not terminated", token.STRING)
+ testError(t, `"abc`, "1:5", "literal not terminated", token.STRING)
+ testError(t, `"abc`+"\n", "1:5", "literal not terminated", token.STRING)
+}
+
+func testError(t *testing.T, src, pos, msg string, tok token.Type) {
+ s := New([]byte(src))
+
+ errorCalled := false
+ s.Error = func(p token.Pos, m string) {
+ if !errorCalled {
+ if pos != p.String() {
+ t.Errorf("pos = %q, want %q for %q", p, pos, src)
+ }
+
+ if m != msg {
+ t.Errorf("msg = %q, want %q for %q", m, msg, src)
+ }
+ errorCalled = true
+ }
+ }
+
+ tk := s.Scan()
+ if tk.Type != tok {
+ t.Errorf("tok = %s, want %s for %q", tk, tok, src)
+ }
+ if !errorCalled {
+ t.Errorf("error handler not called for %q", src)
+ }
+ if s.ErrorCount == 0 {
+ t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
+ }
+}
+
+func testTokenList(t *testing.T, tokenList []tokenPair) {
+ // create artifical source code
+ buf := new(bytes.Buffer)
+ for _, ident := range tokenList {
+ fmt.Fprintf(buf, "%s\n", ident.text)
+ }
+
+ s := New(buf.Bytes())
+ for _, ident := range tokenList {
+ tok := s.Scan()
+ if tok.Type != ident.tok {
+ t.Errorf("tok = %q want %q for %q\n", tok, ident.tok, ident.text)
+ }
+
+ if tok.Text != ident.text {
+ t.Errorf("text = %q want %q", tok.String(), ident.text)
+ }
+
+ }
+}
+
+func countNewlines(s string) int {
+ n := 0
+ for _, ch := range s {
+ if ch == '\n' {
+ n++
+ }
+ }
+ return n
+}