aboutsummaryrefslogtreecommitdiff
path: root/tokenizer/tokenizer_test.go
diff options
context:
space:
mode:
authorJuan J. Martinez <jjm@usebox.net>2022-07-18 07:45:58 +0100
committerJuan J. Martinez <jjm@usebox.net>2022-07-18 07:45:58 +0100
commit8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642 (patch)
treec53977d1284347bb1d5963ddb4dc7723c40c6e55 /tokenizer/tokenizer_test.go
downloadmicro-lang-8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642.tar.gz
micro-lang-8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642.zip
First public release
Diffstat (limited to 'tokenizer/tokenizer_test.go')
-rw-r--r--tokenizer/tokenizer_test.go310
1 files changed, 310 insertions, 0 deletions
diff --git a/tokenizer/tokenizer_test.go b/tokenizer/tokenizer_test.go
new file mode 100644
index 0000000..47afebb
--- /dev/null
+++ b/tokenizer/tokenizer_test.go
@@ -0,0 +1,310 @@
+package tokenizer
+
+import (
+ "strings"
+ "testing"
+
+ "usebox.net/lang/tokens"
+)
+
+func TestSkipWhitespace(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(" \t\r\n"))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Eof {
+ t.Errorf("Eof expected, got %s", tok)
+ }
+ if tok.Loc.Line != 2 {
+ t.Errorf("line == 2 expected, got %s", tok.Loc)
+ }
+ if tok.Loc.Column != 1 {
+ t.Errorf("column == 1 expected, got %s", tok.Loc)
+ }
+}
+
+func TestComments(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ input string
+ line int
+ col int
+ }{
+ {"single comment", "// a comment\n", 2, 1},
+ {"ignore tokens", "// 1 + 2 is ignored\n", 2, 1},
+ {"skip whitespace", " // a comment\n", 2, 1},
+ {"comment to eof", "\n// comment to eof", 2, 1},
+ {"multiple comments", "// comment\n// another comment", 2, 1},
+ {"whitespace before comment", "\t// comment with whitespace\n\t// comment\n", 3, 1},
+ {"unicode", "// こんにちは\n", 2, 1},
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt.input))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Eof {
+ t.Errorf("Eof expected, got %s", tok)
+ }
+ if tok.Loc.Line != tt.line {
+ t.Errorf("line == %d expected, got %s", tt.line, tok.Loc)
+ }
+ if tok.Loc.Column != tt.col {
+ t.Errorf("column == %d expected, got %s", tt.col, tok.Loc)
+ }
+ })
+ }
+}
+
+func TestIdent(t *testing.T) {
+ for _, tt := range []string{
+ "ident",
+ "MyIdent",
+ "ident2",
+ "名前",
+ "__add",
+ } {
+ t.Run(tt, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Ident {
+ t.Errorf("Ident expected, got %s", tok)
+ }
+ if tok.Value != tt {
+ t.Errorf("value == %s expected, got %s", tt, tok.Value)
+ }
+ })
+ }
+}
+
+func TestMultipleCalls(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("// comment\nident // with a comment\nanother\n"))
+ for _, tt := range []string{
+ "ident", "another",
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Ident {
+ t.Errorf("Ident expected, got %s", tok)
+ }
+ if tok.Value != tt {
+ t.Errorf("value == %s expected, got %s", tt, tok.Value)
+ }
+ }
+}
+
+func TestKeywords(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("true false var const def return number bool string func if else for in continue break"))
+ for _, tt := range []tokens.TokenId{
+ tokens.True,
+ tokens.False,
+ tokens.Var,
+ tokens.Const,
+ tokens.Def,
+ tokens.Return,
+ tokens.TNumber,
+ tokens.TBool,
+ tokens.TString,
+ tokens.TFunc,
+ tokens.If,
+ tokens.Else,
+ tokens.For,
+ tokens.In,
+ tokens.Continue,
+ tokens.Break,
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tt {
+ t.Errorf("%s expected, got %s", tt, tok)
+ }
+ }
+}
+
+func TestNumber(t *testing.T) {
+ for _, tt := range []string{
+ "1234",
+ "0x4d2",
+ "0b10011010010",
+ } {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Number {
+ t.Errorf("Number expected, got %s", tok)
+ }
+ if tok.Value != tt {
+ t.Errorf("value == %s expected, got '%s'", tt, tok.Value)
+ }
+ }
+}
+
+func TestCharacter(t *testing.T) {
+ for tt, e := range map[string]string{
+ "'a'": "a",
+ "'0'": "0",
+ "'\\''": "'",
+ "' '": " ",
+ "'\\n'": "\n",
+ "'\\x0d'": "\r",
+ } {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Char {
+ t.Errorf("Number expected, got %s", tok)
+ }
+ if tok.Value != e {
+ t.Errorf("value == %s expected, got '%s'", e, tok.Value)
+ }
+ }
+}
+
+func TestErrorCharacter(t *testing.T) {
+ for _, tt := range []string{
+ "'12'",
+ "''",
+ "'\\'",
+ "'A",
+ "'世'",
+ "'\\x0'",
+ } {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ _, err := tzr.token()
+ if err == nil {
+ t.Errorf("expected error, didn't happen (input: %s)", tt)
+ }
+ }
+}
+
+func TestString(t *testing.T) {
+ for tt, v := range map[string]string{
+ "\"this is a string\"": "this is a string",
+ "\"0.1234\"": "0.1234",
+ "\"\\\"escaped\\\" string\"": "\"escaped\" string",
+ "\"\\n\\x0d\\t\"": "\n\r\t",
+ "\"Multiline\\nstring\"": "Multiline\nstring",
+ } {
+ t.Run(tt, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.String {
+ t.Errorf("String expected, got \"%s\"", tok)
+ }
+ if tok.Value != v {
+ t.Errorf("value == %s expected, got \"%s\"", tt, tok.Value)
+ }
+ })
+ }
+}
+
+func TestErrorStrnig(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("\"string and EOF"))
+ _, err := tzr.token()
+ if err == nil {
+ t.Errorf("expected error, didn't happen")
+ }
+}
+
+func TestSingleChar(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("{ } ( ) [ ] ; , + - * % / . = > < ! ~ | & ^ ?"))
+ for _, tt := range []tokens.TokenId{
+ tokens.LBrace,
+ tokens.RBrace,
+ tokens.LParen,
+ tokens.RParen,
+ tokens.LBracket,
+ tokens.RBracket,
+ tokens.Semicolon,
+ tokens.Comma,
+ tokens.Add,
+ tokens.Sub,
+ tokens.Mul,
+ tokens.Mod,
+ tokens.Div,
+ tokens.Dot,
+ tokens.Assign,
+ tokens.Gt,
+ tokens.Lt,
+ tokens.Not,
+ tokens.Neg,
+ tokens.BitOr,
+ tokens.BitAnd,
+ tokens.BitXor,
+ tokens.TestE,
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tt {
+ t.Errorf("%s expected, got %s", tt, tok)
+ }
+ }
+}
+
+func TestDoubleChar(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("|| && == != >= <= >> << !?"))
+ for _, tt := range []tokens.TokenId{
+ tokens.Or,
+ tokens.And,
+ tokens.Eq,
+ tokens.Ne,
+ tokens.Ge,
+ tokens.Le,
+ tokens.BitShr,
+ tokens.BitShl,
+ tokens.TagE,
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tt {
+ t.Errorf("%s expected, got %s", tt, tok)
+ }
+ }
+}
+
+func TestScan(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ input string
+ ntokens int
+ }{
+ {"single line", "1 + 2", 4},
+ {"multiple lines", "1 + 2\nident", 5},
+ {"line starts with whitespace", "1 + 2\n\tident", 5},
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt.input))
+ ts, err := tzr.Scan()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if len(ts) != tt.ntokens {
+ t.Errorf("%d tokens expected, got %d", tt.ntokens, len(ts))
+ }
+ last := ts[len(ts)-1]
+ if last.Id != tokens.Eof {
+ t.Errorf("last token expected to be Eof, got %s", last)
+ }
+ })
+ }
+}