package tokenizer import ( "strings" "testing" "usebox.net/micro-lang/tokens" ) func TestSkipWhitespace(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader(" \t\r\n")) tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.Eof { t.Errorf("Eof expected, got %s", tok) } if tok.Loc.Line != 2 { t.Errorf("line == 2 expected, got %s", tok.Loc) } if tok.Loc.Column != 1 { t.Errorf("column == 1 expected, got %s", tok.Loc) } } func TestComments(t *testing.T) { for _, tt := range []struct { name string input string line int col int }{ {"single comment", "// a comment\n", 2, 1}, {"ignore tokens", "// 1 + 2 is ignored\n", 2, 1}, {"skip whitespace", " // a comment\n", 2, 1}, {"comment to eof", "\n// comment to eof", 2, 1}, {"multiple comments", "// comment\n// another comment", 2, 1}, {"whitespace before comment", "\t// comment with whitespace\n\t// comment\n", 3, 1}, {"unicode", "// こんにちは\n", 2, 1}, } { t.Run(tt.name, func(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader(tt.input)) tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.Eof { t.Errorf("Eof expected, got %s", tok) } if tok.Loc.Line != tt.line { t.Errorf("line == %d expected, got %s", tt.line, tok.Loc) } if tok.Loc.Column != tt.col { t.Errorf("column == %d expected, got %s", tt.col, tok.Loc) } }) } } func TestIdent(t *testing.T) { for _, tt := range []string{ "ident", "MyIdent", "ident2", "名前", "__add", } { t.Run(tt, func(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader(tt)) tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.Ident { t.Errorf("Ident expected, got %s", tok) } if tok.Value != tt { t.Errorf("value == %s expected, got %s", tt, tok.Value) } }) } } func TestMultipleCalls(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader("// comment\nident // with a comment\nanother\n")) for _, tt := range []string{ "ident", "another", } { tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.Ident { t.Errorf("Ident expected, got %s", tok) } if tok.Value != tt { t.Errorf("value == %s expected, got %s", tt, tok.Value) } } } func TestKeywords(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader("true false var const def return number bool string func if else for in continue break")) for _, tt := range []tokens.TokenId{ tokens.True, tokens.False, tokens.Var, tokens.Const, tokens.Def, tokens.Return, tokens.TNumber, tokens.TBool, tokens.TString, tokens.TFunc, tokens.If, tokens.Else, tokens.For, tokens.In, tokens.Continue, tokens.Break, } { tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tt { t.Errorf("%s expected, got %s", tt, tok) } } } func TestNumber(t *testing.T) { for _, tt := range []string{ "1234", "0x4d2", "0b10011010010", } { tzr := NewTokenizer("-", strings.NewReader(tt)) tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.Number { t.Errorf("Number expected, got %s", tok) } if tok.Value != tt { t.Errorf("value == %s expected, got '%s'", tt, tok.Value) } } } func TestCharacter(t *testing.T) { for tt, e := range map[string]string{ "'a'": "a", "'0'": "0", "'\\''": "'", "' '": " ", "'\\n'": "\n", "'\\x0d'": "\r", } { tzr := NewTokenizer("-", strings.NewReader(tt)) tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.Char { t.Errorf("Number expected, got %s", tok) } if tok.Value != e { t.Errorf("value == %s expected, got '%s'", e, tok.Value) } } } func TestErrorCharacter(t *testing.T) { for _, tt := range []string{ "'12'", "''", "'\\'", "'A", "'世'", "'\\x0'", } { tzr := NewTokenizer("-", strings.NewReader(tt)) _, err := tzr.token() if err == nil { t.Errorf("expected error, didn't happen (input: %s)", tt) } } } func TestString(t *testing.T) { for tt, v := range map[string]string{ "\"this is a string\"": "this is a string", "\"0.1234\"": "0.1234", "\"\\\"escaped\\\" string\"": "\"escaped\" string", "\"\\n\\x0d\\t\"": "\n\r\t", "\"Multiline\\nstring\"": "Multiline\nstring", } { t.Run(tt, func(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader(tt)) tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tokens.String { t.Errorf("String expected, got \"%s\"", tok) } if tok.Value != v { t.Errorf("value == %s expected, got \"%s\"", tt, tok.Value) } }) } } func TestErrorStrnig(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader("\"string and EOF")) _, err := tzr.token() if err == nil { t.Errorf("expected error, didn't happen") } } func TestSingleChar(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader("{ } ( ) [ ] ; , + - * % / . = > < ! ~ | & ^ ?")) for _, tt := range []tokens.TokenId{ tokens.LBrace, tokens.RBrace, tokens.LParen, tokens.RParen, tokens.LBracket, tokens.RBracket, tokens.Semicolon, tokens.Comma, tokens.Add, tokens.Sub, tokens.Mul, tokens.Mod, tokens.Div, tokens.Dot, tokens.Assign, tokens.Gt, tokens.Lt, tokens.Not, tokens.Neg, tokens.BitOr, tokens.BitAnd, tokens.BitXor, tokens.TestE, } { tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tt { t.Errorf("%s expected, got %s", tt, tok) } } } func TestDoubleChar(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader("|| && == != >= <= >> << !?")) for _, tt := range []tokens.TokenId{ tokens.Or, tokens.And, tokens.Eq, tokens.Ne, tokens.Ge, tokens.Le, tokens.BitShr, tokens.BitShl, tokens.TagE, } { tok, err := tzr.token() if err != nil { t.Errorf("unexpected error %s", err) } if tok.Id != tt { t.Errorf("%s expected, got %s", tt, tok) } } } func TestScan(t *testing.T) { for _, tt := range []struct { name string input string ntokens int }{ {"single line", "1 + 2", 4}, {"multiple lines", "1 + 2\nident", 5}, {"line starts with whitespace", "1 + 2\n\tident", 5}, } { t.Run(tt.name, func(t *testing.T) { tzr := NewTokenizer("-", strings.NewReader(tt.input)) ts, err := tzr.Scan() if err != nil { t.Errorf("unexpected error %s", err) } if len(ts) != tt.ntokens { t.Errorf("%d tokens expected, got %d", tt.ntokens, len(ts)) } last := ts[len(ts)-1] if last.Id != tokens.Eof { t.Errorf("last token expected to be Eof, got %s", last) } }) } }