From 8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642 Mon Sep 17 00:00:00 2001 From: "Juan J. Martinez" Date: Mon, 18 Jul 2022 07:45:58 +0100 Subject: First public release --- tokenizer/tokenizer_test.go | 310 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 tokenizer/tokenizer_test.go (limited to 'tokenizer/tokenizer_test.go') diff --git a/tokenizer/tokenizer_test.go b/tokenizer/tokenizer_test.go new file mode 100644 index 0000000..47afebb --- /dev/null +++ b/tokenizer/tokenizer_test.go @@ -0,0 +1,310 @@ +package tokenizer + +import ( + "strings" + "testing" + + "usebox.net/lang/tokens" +) + +func TestSkipWhitespace(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(" \t\r\n")) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Eof { + t.Errorf("Eof expected, got %s", tok) + } + if tok.Loc.Line != 2 { + t.Errorf("line == 2 expected, got %s", tok.Loc) + } + if tok.Loc.Column != 1 { + t.Errorf("column == 1 expected, got %s", tok.Loc) + } +} + +func TestComments(t *testing.T) { + for _, tt := range []struct { + name string + input string + line int + col int + }{ + {"single comment", "// a comment\n", 2, 1}, + {"ignore tokens", "// 1 + 2 is ignored\n", 2, 1}, + {"skip whitespace", " // a comment\n", 2, 1}, + {"comment to eof", "\n// comment to eof", 2, 1}, + {"multiple comments", "// comment\n// another comment", 2, 1}, + {"whitespace before comment", "\t// comment with whitespace\n\t// comment\n", 3, 1}, + {"unicode", "// こんにちは\n", 2, 1}, + } { + t.Run(tt.name, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt.input)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Eof { + t.Errorf("Eof expected, got %s", tok) + } + if tok.Loc.Line != tt.line { + t.Errorf("line == %d expected, got %s", tt.line, tok.Loc) + } + if tok.Loc.Column != tt.col { + t.Errorf("column == %d expected, got %s", tt.col, tok.Loc) + } + }) + } +} + +func TestIdent(t *testing.T) { + for _, tt := range []string{ + "ident", + "MyIdent", + "ident2", + "名前", + "__add", + } { + t.Run(tt, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Ident { + t.Errorf("Ident expected, got %s", tok) + } + if tok.Value != tt { + t.Errorf("value == %s expected, got %s", tt, tok.Value) + } + }) + } +} + +func TestMultipleCalls(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("// comment\nident // with a comment\nanother\n")) + for _, tt := range []string{ + "ident", "another", + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Ident { + t.Errorf("Ident expected, got %s", tok) + } + if tok.Value != tt { + t.Errorf("value == %s expected, got %s", tt, tok.Value) + } + } +} + +func TestKeywords(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("true false var const def return number bool string func if else for in continue break")) + for _, tt := range []tokens.TokenId{ + tokens.True, + tokens.False, + tokens.Var, + tokens.Const, + tokens.Def, + tokens.Return, + tokens.TNumber, + tokens.TBool, + tokens.TString, + tokens.TFunc, + tokens.If, + tokens.Else, + tokens.For, + tokens.In, + tokens.Continue, + tokens.Break, + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tt { + t.Errorf("%s expected, got %s", tt, tok) + } + } +} + +func TestNumber(t *testing.T) { + for _, tt := range []string{ + "1234", + "0x4d2", + "0b10011010010", + } { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Number { + t.Errorf("Number expected, got %s", tok) + } + if tok.Value != tt { + t.Errorf("value == %s expected, got '%s'", tt, tok.Value) + } + } +} + +func TestCharacter(t *testing.T) { + for tt, e := range map[string]string{ + "'a'": "a", + "'0'": "0", + "'\\''": "'", + "' '": " ", + "'\\n'": "\n", + "'\\x0d'": "\r", + } { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Char { + t.Errorf("Number expected, got %s", tok) + } + if tok.Value != e { + t.Errorf("value == %s expected, got '%s'", e, tok.Value) + } + } +} + +func TestErrorCharacter(t *testing.T) { + for _, tt := range []string{ + "'12'", + "''", + "'\\'", + "'A", + "'世'", + "'\\x0'", + } { + tzr := NewTokenizer("-", strings.NewReader(tt)) + _, err := tzr.token() + if err == nil { + t.Errorf("expected error, didn't happen (input: %s)", tt) + } + } +} + +func TestString(t *testing.T) { + for tt, v := range map[string]string{ + "\"this is a string\"": "this is a string", + "\"0.1234\"": "0.1234", + "\"\\\"escaped\\\" string\"": "\"escaped\" string", + "\"\\n\\x0d\\t\"": "\n\r\t", + "\"Multiline\\nstring\"": "Multiline\nstring", + } { + t.Run(tt, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.String { + t.Errorf("String expected, got \"%s\"", tok) + } + if tok.Value != v { + t.Errorf("value == %s expected, got \"%s\"", tt, tok.Value) + } + }) + } +} + +func TestErrorStrnig(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("\"string and EOF")) + _, err := tzr.token() + if err == nil { + t.Errorf("expected error, didn't happen") + } +} + +func TestSingleChar(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("{ } ( ) [ ] ; , + - * % / . = > < ! ~ | & ^ ?")) + for _, tt := range []tokens.TokenId{ + tokens.LBrace, + tokens.RBrace, + tokens.LParen, + tokens.RParen, + tokens.LBracket, + tokens.RBracket, + tokens.Semicolon, + tokens.Comma, + tokens.Add, + tokens.Sub, + tokens.Mul, + tokens.Mod, + tokens.Div, + tokens.Dot, + tokens.Assign, + tokens.Gt, + tokens.Lt, + tokens.Not, + tokens.Neg, + tokens.BitOr, + tokens.BitAnd, + tokens.BitXor, + tokens.TestE, + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tt { + t.Errorf("%s expected, got %s", tt, tok) + } + } +} + +func TestDoubleChar(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("|| && == != >= <= >> << !?")) + for _, tt := range []tokens.TokenId{ + tokens.Or, + tokens.And, + tokens.Eq, + tokens.Ne, + tokens.Ge, + tokens.Le, + tokens.BitShr, + tokens.BitShl, + tokens.TagE, + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tt { + t.Errorf("%s expected, got %s", tt, tok) + } + } +} + +func TestScan(t *testing.T) { + for _, tt := range []struct { + name string + input string + ntokens int + }{ + {"single line", "1 + 2", 4}, + {"multiple lines", "1 + 2\nident", 5}, + {"line starts with whitespace", "1 + 2\n\tident", 5}, + } { + t.Run(tt.name, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt.input)) + ts, err := tzr.Scan() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if len(ts) != tt.ntokens { + t.Errorf("%d tokens expected, got %d", tt.ntokens, len(ts)) + } + last := ts[len(ts)-1] + if last.Id != tokens.Eof { + t.Errorf("last token expected to be Eof, got %s", last) + } + }) + } +} -- cgit v1.2.3