From 8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642 Mon Sep 17 00:00:00 2001 From: "Juan J. Martinez" Date: Mon, 18 Jul 2022 07:45:58 +0100 Subject: First public release --- .gitignore | 6 + COPYING | 21 + Makefile | 38 ++ README.md | 58 ++ TODO.md | 90 +++ ast/ast.go | 454 +++++++++++++++ cmd/micro/micro.go | 202 +++++++ docs/tour.md | 488 ++++++++++++++++ errors/errors.go | 106 ++++ go.mod | 3 + grammar.md | 65 +++ interpreter/callable.go | 227 ++++++++ interpreter/interpreter.go | 801 +++++++++++++++++++++++++++ interpreter/interpreter_test.go | 754 +++++++++++++++++++++++++ micro-vim/ftdetect/micro.vim | 3 + micro-vim/ftplugin/micro.vim | 26 + micro-vim/syntax/micro.vim | 90 +++ parser/parser.go | 1168 +++++++++++++++++++++++++++++++++++++++ parser/parser_test.go | 250 +++++++++ tokenizer/tokenizer.go | 356 ++++++++++++ tokenizer/tokenizer_test.go | 310 +++++++++++ tokens/tokens.go | 186 +++++++ 22 files changed, 5702 insertions(+) create mode 100644 .gitignore create mode 100644 COPYING create mode 100644 Makefile create mode 100644 README.md create mode 100644 TODO.md create mode 100644 ast/ast.go create mode 100644 cmd/micro/micro.go create mode 100644 docs/tour.md create mode 100644 errors/errors.go create mode 100644 go.mod create mode 100644 grammar.md create mode 100644 interpreter/callable.go create mode 100644 interpreter/interpreter.go create mode 100644 interpreter/interpreter_test.go create mode 100644 micro-vim/ftdetect/micro.vim create mode 100644 micro-vim/ftplugin/micro.vim create mode 100644 micro-vim/syntax/micro.vim create mode 100644 parser/parser.go create mode 100644 parser/parser_test.go create mode 100644 tokenizer/tokenizer.go create mode 100644 tokenizer/tokenizer_test.go create mode 100644 tokens/tokens.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..04863c4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.swp +*~ +*.out +*.test +bin/ +input.micro diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..3271d4a --- /dev/null +++ b/COPYING @@ -0,0 +1,21 @@ +Micro, a toy programming language +Copyright (C) 2022 by Juan J. Martinez + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..74532be --- /dev/null +++ b/Makefile @@ -0,0 +1,38 @@ +MAIN := cmd/micro/micro.go +NAME := micro +GO_SOURCES := $(shell find . -name '*.go') +VERSION_STRING := $(shell git describe --exact-match --tags --always HEAD 2>/dev/null || echo git-`git rev-parse --short=6 HEAD 2>/dev/null` || echo unknown) +GOBUILD_FLAGS := -ldflags "-X main.Version=$(VERSION_STRING)" -mod=mod +PACKAGES := $(shell go list ./...) + +.PHONY: clean run parse bindir + +build: bin/$(NAME) + +build-win: bin/$(NAME).exe + +run: build + bin/$(NAME) input.micro + +parse: build + bin/$(NAME) -parse input.micro + +vet: + go vet $(PACKAGES) + +bin/$(NAME): bindir $(GO_SOURCES) + GOBIN="$(CURDIR)/bin" go build -o bin/$(NAME) $(GOBUILD_FLAGS) $(MAIN) + +bin/$(NAME).exe: bindir $(GO_SOURCES) + GOOS=windows GOARCH=386 CGO_ENABLED=1 CXX=i686-w64-mingw32-g++ CC=i686-w64-mingw32-gcc GOBIN="$(CURDIR)/bin" go build $(GOBUILD_FLAGS) -o bin/$(NAME).exe $(MAIN) + +GO_TEST_FLAGS ?= "" +test: + go test ./... -v $(GO_TEST_FLAGS) + +bindir: + mkdir -p ./bin + +clean: + rm -f bin/$(NAME) bin/$(NAME).exe + diff --git a/README.md b/README.md new file mode 100644 index 0000000..14b1170 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# Micro Programming Language + +Micro is a small statically typed toy programming language. + +Objectives: + +* Learn and have fun +* Build an interpreter +* (may be) Build a cross-compiler targeting 8-bit microcomputers (Z80 CPU) + * Easy to interact with existing libraries written in other languages +* Fast compilation (compiler) / start-up time (interpreter) +* Micro should be statically typed, small but useful, and reasonably performant (including on the target CPU) + +## Current status + +There is a tree-walk interpreter built in Go mostly following the most excellent [Crafting Interpreters](https://craftinginterpreters.com/) book. It isn't fast, but it can be useful to test code and play around. + +A compiler targeting the Z80 CPU was planned, and that is influencing what is available on the interpreter. Unfortunately this has proven to be more than I can currently tackle, so I'm releasing the interpreter "as is". + +There's a plugin to provide syntax highlighting in vim. The proposed extension for Micro source code is `.micro` or `.cro`. + +### Running the interpreter + +Run a script with `micro file.micro`, or start the REPL with `micro`: +``` +Welcome to Micro +Type in expressions for evaluation, or :q to exit. + +micro> println("Hello micro!"); +Hello micro! +13 +micro> :q +``` + +Also run `micro -h` for the CLI help. + +An example of a program: + +```micro +def fib(n number) number { + if n < 2 { + return n; + } else { + return fib(n - 1) + fib(n - 2); + } +} + +println(fib(20)); +// output: 6765 +``` +Check [the tour](docs/tour.md) for features and details about the language. + +You can also read [the grammar](grammar.md). + +## Copying + +This software is distributed under MIT license, unless stated otherwise. + diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..9203907 --- /dev/null +++ b/TODO.md @@ -0,0 +1,90 @@ +# Numeric types + +Instead of "number", integer: + + * i8, i16: signed + * u8, u16: unsigned + +Operations: + + * x.lo and x.hi are r/w properties to get/set low/high bytes in 16-bit numbers + * i8(x), u8(x), i16(x), u16(x) for conversions (there are no implicit conversions) + +Consequences: + + * array index is either u8 or u16 + +# Strings + +Strings are zero ended arrays of u8 that can be represented as a literal using double quotes. + +```micro +// 6 characters plus a 0 at the end +var s [7]u8 = "string"; + +// change value; length <= array size +s = "value"; + +s = "another"; +// error: string is too long for [7]u8 +``` + +Notes: + + * "xxx" + 0 to initialize an array of u8 + * can be used to change the value + * the string can be shorter than the array space, but length + 1 must fit in the array + +The interpreter built-in `println` will print literals as strings, but in order to output an array of u8 as a string, use `str` property: + +The following escape sequences are supported in both strings and character literals: + + * In string literals `\"` for a double quote. + * In character literals `\'` for a single quote. + * `\n` for end of line. + * `\t` for tab. + * `\x00` for a 2 digits hex number (8-bit). + * `\\` for the backslash itself. + +```micro +// the array is initialized to zeroes +var s [10]u8; + +s[0] = 'H'; +s[1] = 'e'; +s[2] = 'l'; +s[3] = 'l'; +s[4] = 'o'; + +println(s.str); +// output: Hello +``` + +# Constants and expression evaluation + +The parser should resolve most constants and expression evaluation. + +e.g. `a = 1 + 2` -> `a = 3` + +Constant as literal replacement (constants MUST be literals after expression evaluation). + +Built-in `len` should be resolved at parsing time (applies to array size and that is static). + +# Remove closures + + * why: they are dynamic + * can we have a limited closure? + +# Directives + +Include a file in current source and it will interpreted / compiled. + +```micro +include "file.micro" +``` + +Include a binary file as an expression list to initialize an array. + +```micro +var arr [64]number = incbin "file.micro"; +``` diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..a654e4b --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,454 @@ +package ast + +import ( + "encoding/json" + "fmt" + "strings" + + "usebox.net/lang/tokens" +) + +// for JSON +type node struct { + Type string `json:"type"` + Object any `json:"object"` +} + +var ( + TypeNumber = Type{Value: tokens.Token{Id: tokens.TNumber}, Const: true} + TypeBool = Type{Value: tokens.Token{Id: tokens.TBool}, Const: true} + TypeString = Type{Value: tokens.Token{Id: tokens.TString}, Const: true} + TypeArray = Type{Value: tokens.Token{Id: tokens.TArray}, Const: true} + TypeNone = Type{Value: tokens.Token{Id: tokens.None}, Const: true} +) + +type Type struct { + Value tokens.Token `json:"value"` + Params []Type `json:"params,omitempty"` + Ret *Type `json:"type,omitempty"` + Len *int `json:"len,omitempty"` + Const bool `json:"const"` + Inst bool `json:"-"` + Ref bool `json:"-"` +} + +func NewStructType(name tokens.Token) *Type { + return &Type{Value: tokens.Token{Id: tokens.TStruct, Value: name.Value, Loc: name.Loc}} +} + +func NewFuncType(name tokens.Token, params []Type, ret *Type) *Type { + return &Type{Value: tokens.Token{Id: tokens.TFunc, Loc: name.Loc}, Params: params, Ret: ret} +} + +func (t Type) MarshalJSON() ([]byte, error) { + type Object Type + return json.MarshalIndent(node{"type", Object(t)}, "", " ") +} + +func (t Type) String() string { + if t.Value.Id == tokens.TFunc { + var b = strings.Builder{} + b.WriteString(t.Value.Id.String()) + if t.Params == nil { + // not specified (e.g. println) + b.WriteString(" (...)") + } else if len(t.Params) > 0 { + b.WriteString(" (") + for n, param := range t.Params { + b.WriteString(param.String()) + if n < len(t.Params)-1 { + b.WriteString(", ") + } + } + b.WriteString(")") + } else { + b.WriteString(" ()") + } + + if t.Ret != nil { + b.WriteString(" ") + b.WriteString(t.Ret.String()) + } + + return b.String() + } + + if t.Value.Id == tokens.TStruct { + return fmt.Sprintf("struct %s", t.Value.Value) + } + + if t.Value.Id == tokens.TArray { + if t.Ret != nil { + return fmt.Sprintf("array [%d]%s", *t.Len, t.Ret.String()) + } + return "array" + } + + return t.Value.Id.String() +} + +func (t Type) Compare(other *Type) bool { + if other == nil { + return false + } + if t.Value.Id != other.Value.Id { + return false + } + + if t.Value.Id == tokens.TArray { + // this is len accepting any length of array + if t.Len == nil || other.Len == nil { + return true + } + return *t.Len == *other.Len + } + + if t.Value.Id == tokens.TStruct { + return t.Value.Value == other.Value.Value + } + + if t.Value.Id == tokens.TFunc { + // func checks + if len(t.Params) != len(other.Params) { + return false + } + for i := range t.Params { + if !t.Params[i].Compare(&other.Params[i]) { + return false + } + } + if t.Ret != nil { + return t.Ret.Compare(other.Ret) + } + return t.Ret == other.Ret + } + + return true +} + +func (t Type) ResolveConst() bool { + if t.Value.Id == tokens.TArray { + return t.Ret.Const + } + return t.Const +} + +type Expr interface { + Resolve() *Type +} + +type ExprList struct { + Exprs []Expr `json:"exprs"` +} + +func (n ExprList) MarshalJSON() ([]byte, error) { + type Object ExprList + return json.MarshalIndent(node{"exprlist", Object(n)}, "", " ") +} + +func (n ExprList) Resolve() *Type { + return n.Exprs[0].Resolve() +} + +type Var struct { + Name tokens.Token `json:"ident"` + Type Type `json:"type"` + Initv Expr `json:"initval,omitempty"` +} + +func (n Var) MarshalJSON() ([]byte, error) { + type Object Var + return json.MarshalIndent(node{"var", Object(n)}, "", " ") +} + +func (n Var) Resolve() *Type { + return &n.Type +} + +type Variable struct { + Name tokens.Token `json:"ident"` + Index Expr `json:"index,omitempty"` + Type Type `json:"-"` +} + +func (n Variable) MarshalJSON() ([]byte, error) { + type Object Variable + return json.MarshalIndent(node{"variable", Object(n)}, "", " ") +} + +func (n Variable) Resolve() *Type { + if n.Type.Value.Id == tokens.TArray && n.Index != nil { + return n.Type.Ret + } + return &n.Type +} + +func (n Variable) String() string { + if n.Index != nil { + return fmt.Sprintf("%s[%s]", n.Name.Value, n.Index.Resolve().Value) + } + if n.Type.Value.Id == tokens.TStruct { + return fmt.Sprintf(".%s", n.Type.Value.Value, n.Name.Value) + } + return n.Name.String() +} + +type Assign struct { + Left Expr `json:"left"` + Right Expr `json:"right"` + Loc tokens.Location `json:"-"` +} + +func (n Assign) MarshalJSON() ([]byte, error) { + type Object Assign + return json.MarshalIndent(node{"variable", Object(n)}, "", " ") +} + +func (n Assign) Resolve() *Type { + return n.Left.Resolve() +} + +type Binary struct { + Left Expr `json:"left"` + Op tokens.Token `json:"op"` + Right Expr `json:"right"` + Type *Type +} + +func (n Binary) MarshalJSON() ([]byte, error) { + type Object Binary + return json.MarshalIndent(node{"binary", Object(n)}, "", " ") +} + +func (n Binary) Resolve() *Type { + if n.Type != nil { + return n.Type + } else { + return n.Left.Resolve() + } +} + +type Unary struct { + Op tokens.Token `json:"op"` + Right Expr `json:"expr"` +} + +func (n Unary) MarshalJSON() ([]byte, error) { + type Object Unary + return json.MarshalIndent(node{"unary", Object(n)}, "", " ") +} + +func (n Unary) Resolve() *Type { + if n.Op.Id == tokens.TestE { + return &TypeBool + } + return n.Right.Resolve() +} + +type Number int64 + +func (n Number) String() string { + return fmt.Sprintf("%d", n) +} + +func (n Number) Resolve() *Type { + return &TypeNumber +} + +type Literal struct { + Value tokens.Token `json:"value"` + Numval Number `json:"numval,omitempty"` + Type Type `json:"-"` +} + +func (n Literal) MarshalJSON() ([]byte, error) { + type Object Literal + return json.MarshalIndent(node{"literal", Object(n)}, "", " ") +} + +func (n Literal) Resolve() *Type { + return &n.Type +} + +type Group struct { + Expr Expr `json:"expr"` +} + +func (n Group) MarshalJSON() ([]byte, error) { + type Object Group + return json.MarshalIndent(node{"group", Object(n)}, "", " ") +} + +func (n Group) Resolve() *Type { + return n.Expr.Resolve() +} + +type Block struct { + Stmts []Expr `json:"stmts"` +} + +func (n Block) MarshalJSON() ([]byte, error) { + type Object Block + return json.MarshalIndent(node{"block", Object(n)}, "", " ") +} + +func (n Block) Resolve() *Type { + // not an expression + return &TypeNone +} + +type For struct { + Cond Expr `json:"cond,omitempty"` + Stmts Expr `json:"stmts"` +} + +func (n For) MarshalJSON() ([]byte, error) { + type Object For + return json.MarshalIndent(node{"for", Object(n)}, "", " ") +} + +func (n For) Resolve() *Type { + // not an expression + return &TypeNone +} + +type ForIn struct { + Name tokens.Token `json:"ident"` + Expr Expr `json:"expr"` + Stmts Expr `json:"stmts"` +} + +func (n ForIn) MarshalJSON() ([]byte, error) { + type Object ForIn + return json.MarshalIndent(node{"forin", Object(n)}, "", " ") +} + +func (n ForIn) Resolve() *Type { + // not an expression + return &TypeNone +} + +type IfElse struct { + Cond Expr `json:"cond"` + True Expr `json:"true"` + False Expr `json:"false"` +} + +func (n IfElse) MarshalJSON() ([]byte, error) { + type Object IfElse + return json.MarshalIndent(node{"ifelse", Object(n)}, "", " ") +} + +func (n IfElse) Resolve() *Type { + // not an expression + return &TypeNone +} + +type Call struct { + Callee Expr `json:"callee"` + Loc tokens.Location `json:"-"` + Args []Expr `json:"args"` +} + +func (n Call) MarshalJSON() ([]byte, error) { + type Object Call + return json.MarshalIndent(node{"call", Object(n)}, "", " ") +} + +func (n Call) Resolve() *Type { + typ := n.Callee.Resolve().Ret + if typ == nil { + typ = &TypeNone + } + return typ +} + +type Struct struct { + Name tokens.Token `json:"ident"` + Body Block `json:"body"` +} + +func (n Struct) MarshalJSON() ([]byte, error) { + type Object Struct + return json.MarshalIndent(node{"struct", Object(n)}, "", " ") +} + +func (n Struct) Resolve() *Type { + return NewStructType(n.Name) +} + +type GetExpr struct { + Object Expr `json:"object"` + Expr Expr `json:"expr"` +} + +func (n GetExpr) MarshalJSON() ([]byte, error) { + type Object GetExpr + return json.MarshalIndent(node{"get", Object(n)}, "", " ") +} + +func (n GetExpr) Resolve() *Type { + // this is likely to be wrong + return n.Expr.Resolve() +} + +type Func struct { + Name tokens.Token `json:"ident"` + Params []Var `json:"params"` + Ret *Type `json:"ret"` + Body Block `json:"body"` +} + +func (n Func) MarshalJSON() ([]byte, error) { + type Object Func + return json.MarshalIndent(node{"func", Object(n)}, "", " ") +} + +func (n Func) Resolve() *Type { + if n.Ret == nil { + return &TypeNone + } else { + return n.Ret + } +} + +type Return struct { + Value Expr `json:"expr"` + Loc tokens.Location `json:"-"` + Error bool `json:"error"` +} + +func (n Return) MarshalJSON() ([]byte, error) { + type Object Return + return json.MarshalIndent(node{"return", Object(n)}, "", " ") +} + +func (n Return) Resolve() *Type { + return n.Value.Resolve() +} + +type Continue struct { + Loc tokens.Location `json:"-"` +} + +func (n Continue) MarshalJSON() ([]byte, error) { + type Object Continue + return json.MarshalIndent(node{"continue", Object(n)}, "", " ") +} + +func (n Continue) Resolve() *Type { + return &TypeNone +} + +type Break struct { + Loc tokens.Location `json:"-"` +} + +func (n Break) MarshalJSON() ([]byte, error) { + type Object Break + return json.MarshalIndent(node{"break", Object(n)}, "", " ") +} + +func (n Break) Resolve() *Type { + return &TypeNone +} diff --git a/cmd/micro/micro.go b/cmd/micro/micro.go new file mode 100644 index 0000000..3e5c9e6 --- /dev/null +++ b/cmd/micro/micro.go @@ -0,0 +1,202 @@ +package main + +import ( + "bufio" + "encoding/json" + "errors" + "flag" + "fmt" + "os" + "path" + "strings" + "time" + + "usebox.net/lang/interpreter" + "usebox.net/lang/parser" + "usebox.net/lang/tokenizer" +) + +const name = "micro" + +var Version = "" + +func perror(err error) { + fmt.Fprintln(os.Stderr, err) + + first := err + for { + perr := err + err = errors.Unwrap(err) + if err == nil { + if perr.Error() != first.Error() { + fmt.Fprintf(os.Stderr, "%s\n", perr) + } + break + } + } +} + +func fatal(errno int, err error) { + perror(err) + os.Exit(errno) +} + +func fatals(typ string, errno int, err error) { + fmt.Fprintf(os.Stderr, "%s: %s\n", name, typ) + fatal(errno, err) +} + +func repl() { + scanner := bufio.NewScanner(os.Stdin) + p := parser.NewParser(interpreter.BuiltInTypes()) + i := interpreter.NewInterpreter() + + fmt.Printf("Welcome to %s %s\nType in expressions for evaluation, or :q to exit.\n\n", strings.Title(name), Version) + + repl := 0 + for { + fmt.Printf("%s> ", name) + var line string + multiline := false + for { + if multiline { + fmt.Print(" | ") + line += " " + } + if !scanner.Scan() { + fmt.Println() + return + } + r := scanner.Text() + if r == ":q" { + return + } + line += r + + if !multiline { + if r == "" { + continue + } + if r[len(r)-1] != ';' { + multiline = true + continue + } + break + } + if r == "" { + break + } + } + + in := strings.NewReader(line) + repl++ + t := tokenizer.NewTokenizer(fmt.Sprintf("in%d", repl), in) + ts, err := t.Scan() + if err != nil { + perror(err) + continue + } + + tree, err := p.Parse(ts) + if err != nil { + perror(err) + continue + } + + for _, expr := range tree { + v, err := i.Interp(expr) + if err != nil { + perror(err) + break + } + if v != nil { + fmt.Println(v) + } + } + } +} + +func usage() { + me := path.Base(os.Args[0]) + fmt.Fprintf(os.Stderr, + "Usage: %s [options] ... [file]\nOptions:\n", me) + flag.PrintDefaults() + fmt.Fprintln(os.Stderr) +} + +func main() { + fVersion := flag.Bool("V", false, "display version and exit") + fParse := flag.Bool("parse", false, "parse reporting any errors and exit") + fDebugTokens := flag.Bool("tokens", false, "dump to stderr tokens and exit") + fDebugAst := flag.Bool("ast", false, "dump to stderr AST and exit") + fTime := flag.Bool("time", false, "measure execution time") + flag.Usage = usage + flag.Parse() + + if *fVersion { + fmt.Printf("%s %s\n", name, Version) + os.Exit(0) + } + + var input *os.File + var filename = flag.Arg(0) + + if filename == "" { + repl() + os.Exit(0) + } else { + var err error + input, err = os.Open(filename) + if err != nil { + fatals("error opening input file", 1, err) + } + } + t := tokenizer.NewTokenizer(filename, input) + ts, err := t.Scan() + if err != nil { + fatals("parsing error", 10, err) + } + + if *fDebugTokens { + out, err := json.MarshalIndent(ts, "", " ") + if err != nil { + fatals("encoding error", 11, err) + } + fmt.Fprintf(os.Stderr, "%s\n", string(out)) + os.Exit(0) + } + + p := parser.NewParser(interpreter.BuiltInTypes()) + tree, err := p.Parse(ts) + if *fParse { + if err != nil { + perror(err) + os.Exit(1) + } + os.Exit(0) + } + if err != nil { + fatal(20, err) + } + + if *fDebugAst { + out, err := json.MarshalIndent(tree, "", " ") + if err != nil { + fatals("encoding error", 21, err) + } + fmt.Fprintf(os.Stderr, "%s\n", string(out)) + os.Exit(0) + } + + start := time.Now() + i := interpreter.NewInterpreter() + for _, expr := range tree { + if _, err := i.Interp(expr); err != nil { + fatal(31, err) + } + } + + if *fTime { + fmt.Fprintf(os.Stderr, "elapsed: %f\n", time.Since(start).Seconds()) + } +} diff --git a/docs/tour.md b/docs/tour.md new file mode 100644 index 0000000..27c7e14 --- /dev/null +++ b/docs/tour.md @@ -0,0 +1,488 @@ +# A tour of Micro + +Micro is a small statically typed toy programming language. + +## Syntax + +Micro statements are: + +* Variable declarations and expressions, delimited by semicolons. +* Function and structure definitions. + +### Comments + +Only line comments, starting with `//` and ending at the end of the line. + +```micro +// This is a comment +``` + +### Reserved words + +These are the reserved words in micro: + +> var const def func bool true false return if else for in break continue + +See also the built-in functions. + +### Identifiers + +Identifiers can be any string starting with a later (upper or lower case) or underscore (`_`), followed by the same type of character or a numeric value. + +An identifier can't be a reserved word. + +### Blocks and scope + +Micro has series of statements in blocks delimited with `{` and `}`, and a block defines a lexical scope. + +A variable declaration can shadow another from a parent scope. + +```micro +var a number = 1; +println(a); + +{ + // block, new scope + var a string = "one"; + println(a); +} + +println(a); + +// output: +// 1 +// one +// 1 +``` + +### Operators + +Micro uses the same operators and almost the same operator precedence as C, from more to less priority: + +```micro +? ! - ~ +/ * % +& +- + +<< >> | ^ +> >= < <= +|| && +!= == += +``` + +Arithmetic operators only apply to integers. + +Comparison operators only apply to integers, with the exception of `==` and `!=` that also apply to other types (if both operands are of the same type). + +Logic operators apply to booleans and to the following: + +* An empty string is false, true otherwise. +* A numeric value of 0 is false, true otherwise. +* A function reference is always true. + +The result of these operators is a boolean and they are lazy operators (e.g. in `a && b`, `b` won't be evaluated if `a` is false). + +## Values + +| Type | Keyword | Sample | +| --- | --- | --- | +| Boolean | `bool` | `true; false; ` | +| Number | `number` (int64) | `123;` | +| String | `string` | `"example";` | + +There is also a "none" type referred when a function doesn't return a value. It is not a available to use but it will appear in error reporting. + +For example, the interpreter running this code: +```micro +def fn() { } + +var a number = fn(); +// error: type mismatch expected number found none +``` + +See functions for more information about the "func" type. + +Number supports the following formats: + + * 0 decimal + * 0x00 hexadecimal + * 0b00 binary + * 'x' for "ASCII character" + +## Variables + +Variables are declared with `var`. + +If they are not initialized, a default value is assigned; with the exception of functions, that can't be used until they have a value (or there will be a runtime error). + +```micro +var a number = 123; +var b number; // initialises default to 0 +var c string; // empty string +var d bool; // false + +// e was not declared: error +e = 123; + +var fn func (); +fn(); +// error: value is not callable +``` + +### Constants + +Constants are immutable values. + +Constants can be literals or expressions that get evaluated at compilation time. + +```micro +const K number = 10; + +var a number = K; + +K = 0; +// error: value cannot be assigned to constant K +``` + +## Conditionals and flow control + +### If-else + +```micro +var a number; + +if a > 0 { + println("it is true"); +} else { + println("it is false"); +} + +// output: +// it is false +``` + +### For and for-in + +The block will loop while the expression evaluates to true. + +```micro +var i number = 4; + +for i > 0 { + println(i); + i = i - 1; +} + +// output: +// 4 +// 3 +// 2 +// 1 +``` + +Loop flow can be altered with: + +- `continue` to jump to next iteration. +- `break` to exit the loop. + +The expression is optional (infinite loop). + +```micro +var i number = 4; + +for { + println(i); + i = i - 1; + if i == 0 { + break; + } +} +``` + +`for ... in` can be used to Iterate over arrays. + +```micro +var arr [3]string = ["one", "two", "three"]; +for i in arr { + // i is of type string + println(i); +} +// output: +// one +// two +// three +``` + +## Functions + +Functions can be defined using `def` and they are assigned a "func" type describing parameters and return type. + +Recursion is supported and recursive tail calls will be optimized. + +```micro +// recursive factorial +// (the tail call will be optimized) +def fact(n number, acc number) number { + if n == 1 { + return acc; + } else { + return fact(n - 1, acc * n); + } +} +// type of fact is: func (number, number) number + +println(fact(20, 1)); +// output: +// 2432902008176640000 + +def fn(a number, b number) { + // implicit return with no value + // it is an error if a value is expected +} +``` + +Functions are higher-order. + +```micro +// b is a function that takes one number and returns a number +def fn(a number, b func (number) number) number { + return b(a); +} +``` + +Closures are supported. + +```micro +def makeCounter() func (number) number { + // defaults to 0 + var c number; + def count() number { + c = c + 1; + return c; + } + return count; +} + +var c func = makeCounter(); + +println(c()); // 1 +println(c()); // 2 +println(c()); // 3 +``` + +Arrays and structures are always passed as reference. + +```micro +var arr [5]number; + +def fn(a [5]number) { + a[0] = 10; +} +fn(arr); + +arr[0]; // 10 +``` + +Local arrays and "struct" can't be returned from a function. + +```micro +def fn() [5]number { + var local [5]number; + return local; +} +// error: returning a local value of type array [5]number +``` + +But references to variables in scope are allowed. + +```micro +var g [5]number; +def fn() [5]number { + // local is a reference to a non-local variable + var local [5]number = g; + return local; +} +// error: returning a local value of type array [5]number +``` + +### Returning error + +For cases when the return value can't be used to inform of an error, a function can tag a return value as "error" using `!?`. That tag can be tested with the `?` unary operator. + +Even if a return value is tagged, it must be a valid return value for the function definition. + +```micro +def safeDiv(a number, b number) number { + // a / 0 would result on a runtime error! + if b == 0 { + return !? 0; + } + return a / b; +} + +var result number; + +// checks if the error tag is set while keeping the return value +if ? (result = safeDiv(1, 0)) { + println("safeDiv returned error"); +} + +// it can also be used with functions returning no values +def fn() { + return !?; +} + +if ? fn() { + println("fn always fails!"); +} +``` + +`?` (test error) operator resets the error flag before evaluating the right expression. + +```micro +// our previous "always fails" function +fn(); +// error is not set +if ? true { + println("Not run"); +} +``` + +## Arrays + +Arrays are zero based and are supported for all types. + +Array size is a numeric literal or a constant expression (must be known at compilation type). + +Arrays can be initialised to literals with: + +* `[` and `]` providing a list of values. +* A literal string using double quotes for arrays of unit8; the array size must be at least the length of the string plus 1 (a zero terminating the string). + +```micro +// array of 5 numbers; the size is only needed +// when no initializing values are provided +var arr [5]number; + +// initialized to specific values +var arr2 [5]bool = [true, false, true, false, true]; + +arr[0]; // 0 +arr2[0]; // true + +// iterate over members +for i in arr { + println(i); +} + +len(arr); // 5 + +var i number; +// arrays indexes are zero based +for i < len(arr) { + println(arr[i]); + i = i + 1; +} + +``` + +Arrays can be constants. + +```micro +const arr [10]number; + +arr[0] = 10; +// error: can't modify a constant variable + +// but can change references +const ref [10]number = arr; +const arr2 [10]number; + +ref = arr2; +``` + +Arrays are passed by reference to functions. + +## Structures + +Structures allow grouping data and functions. + +In a structure definition only variable declarations, and definitions of functions and other structures are allowed. + +```micro +// declare struct A +def A { + var n number; + + def fn() number { + // n is in scope + n = n + 1; + return n; + } +} + +// memory is allocated for obj +var obj A; + +// obj2 is a reference to obj +var obj2 A = obj; + +prinln(obj2.n); +// output: 0 + +println(obj.fn()); +// output: 1 + +prinln(obj2.n); +// output: 1 + +def B { var n bool; } + +var b B = obj; +// error: type mismatch: expected struct B found struct A + +// recursive structures are not supported (micro doesn't have pointers) +def B { + var next B; + // error: recursive definition of struct B +} + +def C { + println("error"); + // error: expected definition in a struct block +} +``` + +## Directives + +TODO + +Include a file in current source and it will interpreted / compiled. + +```micro +include "file.micro" +``` + +Include a binary file as an expression list to initialize an array. + +```micro +var arr [64]number = incbin "file.micro"; +``` + +## Interpreter built-in functions + +These are available on the interpreter. + +| Function | Description | Example | +| --- | --- | --- | +| `len` | returns the size in elements of a variable of type array | `len(arr);` | +| `panic` | ends execution with a message and a runtime error | `panic("message");` | +| `println` | writes to standard output a variable list of expressions, followed by an end of line; returns number of bytes written | `println("value: ", a);` | +| `str` | used with arrays of uint8 to convert them as a printable string | `println(str(arr));` | + + +## To do + +See [TODO](todo.md) for features that are planned but not yet implemented. + diff --git a/errors/errors.go b/errors/errors.go new file mode 100644 index 0000000..a8d1eb0 --- /dev/null +++ b/errors/errors.go @@ -0,0 +1,106 @@ +package errors + +import ( + "fmt" + "strings" + + "usebox.net/lang/tokens" +) + +type ErrCode int + +const ( + Unimplemented ErrCode = iota + SyntaxError + IncompleteEscape + InvalidChar + ExpectedIdent + ExpectedType + AlreadyDeclared + Undefined + ExpectedSemicolon + ExpectedComma + ExpectedExpr + ExpectedLParen + ExpectedRParen + ExpectedLBrace + ExpectedRBrace + ExpectedIn + ExpectedRBracket + ExpectedAssign + ExpectedDefinition + AssignConst + FailedNumeric + InvalidOperation + InvalidValue + InvalidLength + InvalidIndex + InvalidType + TypeMismatch + InvalidTarget + UndefinedIdent + NotCallable + NotIndexable + NotStruct + InvalidNumberArgs + CallError + NoReturn + TailCallError + RecursiveStruct + Unexpected +) + +type Error struct { + Code ErrCode + Message string + Err error +} + +func (e Error) Error() string { + return e.Message +} + +func (e Error) Unwrap() error { + return e.Err +} + +func formatError(loc tokens.Location, msg ...string) string { + return fmt.Sprintf("%s:%d col %d error: %s", loc.File, loc.Line, loc.Column, strings.Join(msg, " ")) +} + +func NewError(code ErrCode, loc tokens.Location, msg ...string) error { + return Error{ + Code: code, + Message: formatError(loc, msg...), + } +} + +func NewErrorWrap(code ErrCode, loc tokens.Location, err error, msg ...string) error { + return Error{ + Code: code, + Message: formatError(loc, msg...), + Err: err, + } +} + +type ErrorList struct { + Errors []error +} + +func (el ErrorList) Error() string { + var b strings.Builder + var nerrors = len(el.Errors) + + for i := 0; i < nerrors; i++ { + fmt.Fprint(&b, el.Errors[i]) + if i == 10 { + fmt.Fprintf(&b, "\n...") + break + } + if i < nerrors-1 { + fmt.Fprintln(&b) + } + } + + return b.String() +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..72a6a14 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module usebox.net/lang + +go 1.18 diff --git a/grammar.md b/grammar.md new file mode 100644 index 0000000..b51adb4 --- /dev/null +++ b/grammar.md @@ -0,0 +1,65 @@ + program = statement* EOF ; + + statement = expr ";" + | definition + | block + | "if" expr block ( "else" block )? + | "for" ( expr? | IDENTIFIER "in" expr ) block + | "return" "!?"? expr? ";" + | "continue" ";" + | "break" ";" + | declaration ; + + definition = "def" IDENTIFIER struct_block + | "def" IDENTIFIER "(" params? ")" TYPE? block + + params = param ( "," param )* ; + param = IDENTIFIER TYPE ; + + block = "{" statement* "}" ; + + struct_block = "{" ( definitions | declaration )+ "}" ; + + expr = assignment ; + + declaration = "const" IDENTIFIER TYPE = expr_list ";" + | "var" IDENTIFIER TYPE ( "=" expr_list )? ";" ; + + expr_list = expr + | "[" expr ( "," expr )? "]" + + assignment = INDENTIFIER ( "[" expr "]" )* "=" expr + | logic_or ; + + logic_or = logic_and ( "||" logic_and )* ; + logic_and = equality ( "&&" equality )* ; + equality = comparison ( ( "!=" | "==" ) comparison )* ; + comparison = bitterm ( ( ">" | ">=" | "<" | "<=" ) bitterm )* ; + bitterm = term ( ( "<<" | ">>" | "|" | "^" ) term )* ; + term = bitfactor ( ( "-" | "+" ) bitfactor )* ; + bitfactor = factor ( "&" factor )* ; + factor = unary ( ( "/" | "*" | "%" ) unary )* ; + + unary = ( "?" | "!" | "-" | "~" ) unary | call ; + call = primary ( "(" arguments? ")" | "." IDENTIFIER )* + | primary ( "[" expr "]" )+ ; + primary = NUMBER | STRING | FUNC | "true" | "false" + | "(" expr ")" + | IDENTIFIER ; + + arguments = expr ( "," expr )* ; + + NUMBER = ( "0x" HEX_DIGIT+ | "0b" BIN_DIGIT+ | DIGIT+ ) + | "'" CHAR "'"; + STRING = "\"" ( "\\\"" | )* "\"" ; + FUNC = IDENTIFIER ; + IDENTIFIER = ALPHA ( ALPHA | DIGIT )* ; + TYPE = NON_ARRAY_TYPE | ARRAY_TYPE + NON_ARRAY_TYPE = "number" | "bool" | "string" | FUNC_TYPE + FUNC_TYPE = "func" "(" ( TYPE ( "," TYPE )* )? ")" TYPE? + ARRAY_TYPE = "[" expr "]" NON_ARRAY_TYPE + ALPHA = "a" ... "z" | "A" ... "Z" | "_" ; + ALPHA_DIGIT = "0" ... "9" | "a" ... "f" | "A" ... "F" ; + BIN_DIGIT = "0" | "1" ; + DIGIT = "0" ... "9" ; + diff --git a/interpreter/callable.go b/interpreter/callable.go new file mode 100644 index 0000000..e9dc6aa --- /dev/null +++ b/interpreter/callable.go @@ -0,0 +1,227 @@ +package interpreter + +import ( + "fmt" + + "usebox.net/lang/ast" + "usebox.net/lang/errors" + "usebox.net/lang/tokens" +) + +type Callable interface { + Name() string + String() string + Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) + Type() *ast.Type + Params() []ast.Type + Ret() *ast.Type +} + +var builtInFuncs = map[string]Callable{ + "len": builtInLen{}, + "println": builtInPrintln{}, + "panic": builtInPanic{}, +} + +func BuiltInTypes() map[string]ast.Type { + types := map[string]ast.Type{} + for k, v := range builtInFuncs { + types[k] = *v.Type() + } + return types +} + +type builtInLen struct{} + +func (n builtInLen) Name() string { + return "'len'" +} + +func (n builtInLen) Type() *ast.Type { + return ast.NewFuncType(tokens.Token{}, n.Params(), n.Ret()) +} + +func (n builtInLen) String() string { + return n.Type().String() +} + +func (n builtInLen) Params() []ast.Type { + // won't be arity or type checked + return []ast.Type{ast.TypeArray} +} + +func (n builtInLen) Ret() *ast.Type { + return &ast.TypeNumber +} + +func (n builtInLen) Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) { + vals, ok := args[0].([]any) + if !ok { + // shouldn't happen + return nil, errors.NewError(errors.Unexpected, loc, "type mismatch in call to 'len'") + } + // return + panic(&PanicJump{typ: PanicJumpReturn, value: ast.Number(len(vals))}) +} + +type builtInPrintln struct{} + +func (n builtInPrintln) Name() string { + return "'println'" +} + +func (n builtInPrintln) Type() *ast.Type { + return ast.NewFuncType(tokens.Token{}, n.Params(), n.Ret()) +} + +func (n builtInPrintln) String() string { + return n.Type().String() +} + +func (n builtInPrintln) Params() []ast.Type { + // won't be arity or type checked + return nil +} + +func (n builtInPrintln) Ret() *ast.Type { + return &ast.TypeNumber +} + +func (n builtInPrintln) Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) { + var count int + + for i := range args { + if args[i] == nil { + continue + } + written, err := fmt.Print(args[i]) + if err != nil { + return nil, err + } + count += written + } + fmt.Println() + count++ + + // return + panic(&PanicJump{typ: PanicJumpReturn, value: ast.Number(count)}) +} + +type builtInPanic struct{} + +func (n builtInPanic) Name() string { + return "'panic'" +} + +func (n builtInPanic) Type() *ast.Type { + return ast.NewFuncType(tokens.Token{}, n.Params(), n.Ret()) +} + +func (n builtInPanic) String() string { + return n.Type().String() +} + +func (n builtInPanic) Params() []ast.Type { + return []ast.Type{ast.TypeString} +} + +func (n builtInPanic) Ret() *ast.Type { + // no return (returns none) + return nil +} + +func (n builtInPanic) Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) { + return nil, fmt.Errorf("[%s] panic: %s", loc, args[0]) +} + +type Function struct { + fun ast.Func + closure *Env +} + +func (f Function) Name() string { + return f.fun.Name.String() +} + +func (f Function) Type() *ast.Type { + return ast.NewFuncType(f.fun.Name, f.Params(), f.Ret()) +} + +func (f Function) String() string { + return f.Type().String() +} + +func (f Function) Params() []ast.Type { + params := make([]ast.Type, 0, 1) + for _, p := range f.fun.Params { + params = append(params, p.Type) + } + return params +} + +func (f Function) Ret() *ast.Type { + return f.fun.Ret +} + +func (f Function) Call(interp *Interpreter, args []any, loc tokens.Location) (result any, err error) { + pEnv := interp.env + interp.env = NewEnv(f.closure) + defer func() { + interp.env = pEnv + }() + + for n, v := range f.fun.Params { + interp.env = interp.env.Set(v.Name.Value, Var{Value: args[n], Loc: v.Name.Loc}) + } + + // tail call optimization + for { + // wrap the evaluation in a function + var tailCall *PanicJump + tailCall, result, err = func() (tailCall *PanicJump, result any, err error) { + // handle tail call + // will call this function again without setting up a new call frame + defer func() { + if r := recover(); r != nil { + if val, ok := r.(*PanicJump); ok && val.typ == PanicJumpTailCall { + tailCall = val + return + } + panic(r) + } + }() + result, err = interp.evaluate(f.fun.Body) + return nil, result, err + }() + if tailCall == nil { + break + } + + // XXX: can we optimize this? + // if the callee can be a variable expression, we probably can't + call := tailCall.value.(ast.Call) + callee, err := interp.evaluate(call.Callee) + if err != nil { + return nil, err + } + if fun, ok := callee.(Callable); !ok || fun.Name() != f.Name() { + // can't be optimized + val, err := interp.evaluate(call) + if err != nil { + return nil, err + } + panic(&PanicJump{typ: PanicJumpReturn, value: val, loc: tailCall.loc}) + } + + args, err := interp.evalArgs(f.Name(), call.Loc, f.Params(), call.Args) + if err != nil { + return nil, err + } + + for n, v := range f.fun.Params { + interp.env.Update(v.Name.Value, args[n]) + } + } + + return result, err +} diff --git a/interpreter/interpreter.go b/interpreter/interpreter.go new file mode 100644 index 0000000..8d5b7a8 --- /dev/null +++ b/interpreter/interpreter.go @@ -0,0 +1,801 @@ +package interpreter + +import ( + "fmt" + "strings" + + "usebox.net/lang/ast" + "usebox.net/lang/errors" + "usebox.net/lang/tokens" +) + +type Var struct { + Value any + Loc tokens.Location +} + +type Env struct { + env map[string]Var + parent *Env +} + +func NewEnv(parent *Env) *Env { + return &Env{make(map[string]Var), parent} +} + +func (e *Env) Get(key string, local bool) (Var, bool) { + p := e + for { + if v, ok := p.env[key]; ok { + return v, true + } + + if !local && p.parent != nil { + p = p.parent + continue + } + + return Var{}, false + } +} + +func (e *Env) GetArray(key string, local bool) ([]any, bool) { + v, ok := e.Get(key, local) + if !ok { + return nil, false + } + arr, ok := v.Value.([]any) + return arr, ok +} + +// Set will create a new enviroment if this new value shadows an existing one. +func (e *Env) Set(key string, value Var) *Env { + p := e + if _, ok := e.Get(key, false); ok { + p = NewEnv(e) + } + p.env[key] = value + return p +} + +func (e *Env) Update(key string, value any) error { + p := e + for { + if v, ok := p.env[key]; ok { + v.Value = value + p.env[key] = v + return nil + } + + if p.parent != nil { + p = p.parent + continue + } + + // should not happen + return fmt.Errorf("variable not found '%s'", key) + } +} + +func (e *Env) string(pad string) string { + b := strings.Builder{} + + b.WriteString(fmt.Sprintf("%senv {\n", pad)) + for k, v := range e.env { + b.WriteString(fmt.Sprintf("%s%s = %v\n", pad+" ", k, v)) + } + if e.parent != nil { + b.WriteString(e.parent.string(pad + " ")) + } + b.WriteString(fmt.Sprintf("%s}", pad)) + + if pad != "" { + b.WriteRune('\n') + } + + return b.String() +} + +func (e Env) String() string { + return e.string("") +} + +func (e *Env) Copy() *Env { + ne := NewEnv(e.parent) + for k, v := range e.env { + val := Var{Value: v.Value, Loc: v.Loc} + // FIXME: structs + switch v := val.Value.(type) { + // update the closures to the new environment + case Function: + v.closure = ne + val.Value = v + // copy the array + case []any: + arr := make([]any, len(v)) + copy(arr, v) + val.Value = arr + } + ne.env[k] = val + } + + return ne +} + +type Struct struct { + name string + env *Env + inst bool +} + +func (s Struct) String() string { + if s.inst { + return fmt.Sprintf("", s.name) + } else { + return fmt.Sprintf("struct %s", s.name) + } +} + +func (s Struct) Instance() Struct { + return Struct{name: s.name, env: s.env.Copy(), inst: true} +} + +type PanicJumpType int + +const ( + PanicJumpReturn PanicJumpType = iota + PanicJumpTailCall + PanicJumpContinue + PanicJumpBreak +) + +type PanicJump struct { + typ PanicJumpType + value any + loc tokens.Location +} + +type Interpreter struct { + env *Env + // location of the last node evaluated + last tokens.Location + fError bool +} + +func NewInterpreter() Interpreter { + global := NewEnv(nil) + for k, v := range builtInFuncs { + // not shadowing any value + global.Set(k, Var{Value: v}) + } + return Interpreter{env: global} +} + +func (i *Interpreter) Interp(expr any) (val any, err error) { + val, err = i.evaluate(expr) + if err != nil { + return nil, err + } + return val, nil +} + +func (i *Interpreter) isTrue(expr any) bool { + switch v := expr.(type) { + case ast.Number: + return v != 0 + case string: + return v != "" + case bool: + return v + case Function, Callable: + return true + default: + return false + } +} + +func (i *Interpreter) unaryNumeric(op tokens.Token, right any) (ast.Number, error) { + if vr, ok := right.(ast.Number); ok { + return vr, nil + } + + // shouldn't happen + return 0, errors.NewError(errors.Unexpected, op.Loc, "invalid operand") +} + +func (i *Interpreter) binaryNumeric(left any, op tokens.Token, right any) (ast.Number, ast.Number, error) { + vl, okl := left.(ast.Number) + vr, okr := right.(ast.Number) + if okl && okr { + return vl, vr, nil + } + + // shouldn't happen + return 0, 0, errors.NewError(errors.Unexpected, op.Loc, "invalid operands") +} + +func (i *Interpreter) evaluate(expr any) (any, error) { + switch v := expr.(type) { + case ast.Var: + i.last = v.Name.Loc + return i.declare(v) + case ast.Variable: + i.last = v.Name.Loc + return i.variable(v) + case ast.Assign: + return i.assignment(v) + case ast.Binary: + i.last = v.Op.Loc + return i.binaryExpr(v) + case ast.Unary: + i.last = v.Op.Loc + return i.unaryExpr(v) + case ast.Literal: + i.last = v.Value.Loc + return i.literalExpr(v) + case ast.Group: + return i.groupExpr(v) + case ast.Block: + return i.block(v) + case ast.IfElse: + return i.ifElse(v) + case ast.For: + return i.forStmt(v) + case ast.ForIn: + return i.forInStmt(v) + case ast.Call: + i.last = v.Loc + return i.call(v) + case ast.Func: + i.last = v.Name.Loc + return i.fun(v) + case ast.Return: + return i.returnStmt(v) + case ast.Continue: + return i.continueStmt(v) + case ast.Break: + return i.breakStmt(v) + case ast.ExprList: + return i.exprList(v) + case ast.Struct: + return i.strct(v) + case ast.GetExpr: + return i.getExpr(v) + default: + // XXX + return nil, errors.Error{ + Code: errors.Unimplemented, + Message: fmt.Sprintf("evaluation unimplemented: %s", v), + } + } +} + +func (i *Interpreter) defVal(typ ast.Type) any { + switch typ.Value.Id { + case tokens.TNumber: + return ast.Number(0) + case tokens.TBool: + return false + case tokens.TString: + return "" + case tokens.TArray: + arr := make([]any, *typ.Len) + for n := range arr { + arr[n] = i.defVal(*typ.Ret) + } + return arr + case tokens.TStruct: + val, _ := i.env.Get(typ.Value.Value, false) + strct := val.Value.(Struct) + return strct.Instance() + default: + return nil + } +} + +func (i *Interpreter) declare(v ast.Var) (any, error) { + var initv any + var err error + + if v.Type.Value.Id == tokens.TArray && *v.Type.Len <= 0 { + return nil, errors.NewError(errors.InvalidLength, v.Name.Loc, "invalid array length") + } + + if v.Initv != nil { + initv, err = i.evaluate(v.Initv) + if err != nil { + return 0, err + } + + i.env = i.env.Set(v.Name.Value, Var{initv, v.Name.Loc}) + return initv, nil + } else { + // defaults + initv = i.defVal(v.Type) + value := Var{initv, v.Name.Loc} + i.env = i.env.Set(v.Name.Value, value) + return initv, nil + } +} + +func (i *Interpreter) variable(v ast.Variable) (any, error) { + if val, ok := i.env.Get(v.Name.Value, false); ok { + value := val.Value + if v.Index != nil { + index, err := i.evaluate(v.Index) + if err != nil { + return nil, err + } + idx, ok := index.(ast.Number) + if !ok || idx < 0 || int(idx) >= *v.Type.Len { + return nil, errors.NewError(errors.InvalidIndex, v.Name.Loc, "invalid index for", v.Type.String()) + } + arr, ok := i.env.GetArray(v.Name.Value, false) + if !ok { + return nil, errors.NewError(errors.Unexpected, v.Name.Loc, "expected array") + } + value = arr[idx] + } + return value, nil + } + // shouldn't happen + return nil, errors.NewError(errors.Unexpected, v.Name.Loc, "undefined variable", v.String()) +} + +func (i *Interpreter) assignment(v ast.Assign) (any, error) { + env := i.env + vLeft := v.Left + + if getExpr, ok := v.Left.(ast.GetExpr); ok { + obj, err := i.evaluate(getExpr.Object) + if err != nil { + return nil, err + } + strct := obj.(Struct) + env = strct.env + vLeft = getExpr.Expr + } + + left, ok := vLeft.(ast.Variable) + if !ok { + return nil, errors.NewError(errors.Unexpected, v.Loc, "expected variable in assignation") + } + right, err := i.evaluate(v.Right) + if err != nil { + return nil, err + } + + if left.Type.Value.Id == tokens.TArray && v.Right.Resolve().Value.Id != tokens.TArray { + if left.Index == nil { + return nil, errors.NewError(errors.Unexpected, v.Loc, "expected index in assignation") + } + index, err := i.evaluate(left.Index) + if err != nil { + return nil, err + } + idx, ok := index.(ast.Number) + if !ok || idx < 0 || int(idx) >= *left.Type.Len { + return nil, errors.NewError(errors.InvalidIndex, v.Loc, "invalid index for", left.Type.String()) + } + arr, ok := i.env.GetArray(left.Name.Value, false) + if !ok { + return nil, errors.NewError(errors.Unexpected, v.Loc, "expected array") + } + arr[idx] = right + right = arr + } + + err = env.Update(left.Name.Value, right) + if err != nil { + return nil, errors.NewError(errors.InvalidValue, left.Name.Loc, err.Error()) + } + return right, nil +} + +func (i *Interpreter) binaryExpr(expr ast.Binary) (any, error) { + // first lazy operators + if expr.Op.Id == tokens.And || expr.Op.Id == tokens.Or { + left, err := i.evaluate(expr.Left) + if err != nil { + return nil, err + } + if expr.Op.Id == tokens.And { + if !i.isTrue(left) { + return false, nil + } + } else { + if i.isTrue(left) { + return true, nil + } + } + right, err := i.evaluate(expr.Right) + if err != nil { + return nil, err + } + return i.isTrue(right), nil + } + + left, err := i.evaluate(expr.Left) + if err != nil { + return nil, err + } + right, err := i.evaluate(expr.Right) + if err != nil { + return nil, err + } + + // equality compares two values as long as they are the same type + switch expr.Op.Id { + case tokens.Ne: + return right != left, nil + case tokens.Eq: + return right == left, nil + } + + vl, vr, err := i.binaryNumeric(left, expr.Op, right) + if err != nil { + return 0, err + } + + switch expr.Op.Id { + case tokens.Sub: + return vl - vr, nil + case tokens.Add: + return vl + vr, nil + case tokens.Mul: + return vl * vr, nil + case tokens.Div: + if vr == 0 { + return nil, errors.NewError(errors.InvalidOperation, expr.Op.Loc, "invalid operation: division by zero") + } + return vl / vr, nil + case tokens.Mod: + if vr == 0 { + return nil, errors.NewError(errors.InvalidOperation, expr.Op.Loc, "invalid operation: division by zero") + } + return vl % vr, nil + case tokens.BitAnd: + return vl & vr, nil + case tokens.BitShl: + return vl << vr, nil + case tokens.BitShr: + return vl >> vr, nil + case tokens.BitOr: + return vl | vr, nil + case tokens.BitXor: + return vl ^ vr, nil + case tokens.Lt: + return vl < vr, nil + case tokens.Le: + return vl <= vr, nil + case tokens.Gt: + return vl > vr, nil + case tokens.Ge: + return vl >= vr, nil + default: + return nil, errors.NewError(errors.Unimplemented, expr.Op.Loc, "unimplemented operator", expr.Op.Value) + } + +} + +func (i *Interpreter) unaryExpr(expr ast.Unary) (any, error) { + if expr.Op.Id == tokens.TestE { + i.fError = false + } + + right, err := i.evaluate(expr.Right) + if err != nil { + return nil, err + } + + switch expr.Op.Id { + case tokens.TestE: + return i.fError, nil + case tokens.Sub: + vr, err := i.unaryNumeric(expr.Op, right) + if err != nil { + return nil, err + } + return -vr, nil + case tokens.Neg: + vr, err := i.unaryNumeric(expr.Op, right) + if err != nil { + return nil, err + } + return ^vr, nil + case tokens.Not: + vr := i.isTrue(right) + return !vr, nil + default: + return nil, errors.NewError(errors.Unimplemented, expr.Op.Loc, "unimplemented operator", expr.Op.Value) + } +} + +func (i *Interpreter) literalExpr(expr ast.Literal) (any, error) { + switch expr.Value.Id { + case tokens.Number: + return expr.Numval, nil + case tokens.String: + return expr.Value.Value, nil + case tokens.True: + return true, nil + case tokens.False: + return false, nil + case tokens.None: + return nil, nil + default: + return nil, errors.NewError(errors.Unimplemented, expr.Value.Loc, "unimplemented type", expr.Value.String()) + } +} + +func (i *Interpreter) groupExpr(expr ast.Group) (any, error) { + return i.evaluate(expr.Expr) +} + +func (i *Interpreter) blockNoEnv(block ast.Block) (any, error) { + var v any + var err error + for _, expr := range block.Stmts { + v, err = i.evaluate(expr) + if err != nil { + return nil, err + } + } + return v, nil +} + +func (i *Interpreter) block(block ast.Block) (any, error) { + pEnv := i.env + i.env = NewEnv(pEnv) + defer func() { + i.env = pEnv + }() + + return i.blockNoEnv(block) +} + +func (i *Interpreter) ifElse(ifElse ast.IfElse) (any, error) { + cond, err := i.evaluate(ifElse.Cond) + if err != nil { + return nil, err + } + if i.isTrue(cond) { + return i.evaluate(ifElse.True) + } else { + return i.evaluate(ifElse.False) + } +} + +func (i *Interpreter) forEval(fcond func() (bool, error), stmts ast.Expr) (any, error) { + var last any + + for { + end, result, err := func() (end bool, result any, err error) { + // handle "interruptions" + defer func() { + if r := recover(); r != nil { + if val, ok := r.(*PanicJump); ok { + if val.typ == PanicJumpContinue { + end = false + return + } + if val.typ == PanicJumpBreak { + end = true + result = last + err = nil + return + } + } + panic(r) + } + }() + for { + if fcond != nil { + cond, err := fcond() + if err != nil { + return true, nil, err + } + if !cond { + return true, last, nil + } + } + + last, err = i.evaluate(stmts) + if err != nil { + return true, nil, err + } + } + }() + if end { + return result, err + } + } +} + +func (i *Interpreter) forStmt(forStmt ast.For) (any, error) { + if forStmt.Cond == nil { + return i.forEval(nil, forStmt.Stmts) + } else { + return i.forEval(func() (bool, error) { + cond, err := i.evaluate(forStmt.Cond) + if err != nil { + return false, err + } + return i.isTrue(cond), nil + }, forStmt.Stmts) + } +} + +func (i *Interpreter) forInStmt(forInStmt ast.ForIn) (any, error) { + expr, err := i.evaluate(forInStmt.Expr) + if err != nil { + return nil, err + } + + index := 0 + arr := expr.([]any) + + pEnv := i.env + i.env = NewEnv(pEnv) + defer func() { + i.env = pEnv + }() + i.env = i.env.Set(forInStmt.Name.Value, Var{Loc: forInStmt.Name.Loc}) + + return i.forEval(func() (bool, error) { + if index == len(arr) { + return false, nil + } + i.env.Update(forInStmt.Name.Value, arr[index]) + index++ + return true, nil + }, forInStmt.Stmts) +} + +func (i *Interpreter) evalArgs(name string, loc tokens.Location, params []ast.Type, args []ast.Expr) ([]any, error) { + vals := make([]any, 0, 16) + for _, a := range args { + arg, err := i.evaluate(a) + if err != nil { + return nil, err + } + vals = append(vals, arg) + } + + return vals, nil +} + +func (i *Interpreter) call(call ast.Call) (result any, err error) { + callee, err := i.evaluate(call.Callee) + if err != nil { + return nil, err + } + + fun, ok := callee.(Callable) + if !ok { + return nil, errors.NewError(errors.NotCallable, call.Loc, "value is not callable") + } + + args, err := i.evalArgs(fun.Name(), call.Loc, fun.Params(), call.Args) + if err != nil { + return nil, err + } + + // handle return via panic call + defer func() { + if r := recover(); r != nil { + if val, ok := r.(*PanicJump); ok && val.typ == PanicJumpReturn { + result = val.value + err = nil + } else { + // won't be handled here + panic(r) + } + } + }() + + _, err = fun.Call(i, args, call.Loc) + if err != nil { + e := errors.NewError(errors.CallError, call.Loc, "error calling", fun.Name()).(errors.Error) + e.Err = err + return nil, e + } + + // clear return if there's no return value + if fun.Ret() == nil { + return nil, nil + } + + return nil, errors.NewError(errors.NoReturn, i.last, "no return value in", fun.Name(), "expected", fun.Ret().String()) +} + +func (i *Interpreter) fun(v ast.Func) (any, error) { + callable := Function{fun: v, closure: i.env} + i.env = i.env.Set(v.Name.Value, Var{callable, v.Name.Loc}) + return nil, nil +} + +func (i *Interpreter) strct(v ast.Struct) (any, error) { + pEnv := i.env + sEnv := NewEnv(pEnv) + i.env = sEnv + defer func() { + i.env = pEnv + strct := Struct{name: v.Name.Value, env: sEnv} + i.env = i.env.Set(v.Name.Value, Var{Value: strct, Loc: v.Name.Loc}) + }() + + _, err := i.blockNoEnv(v.Body) + if err != nil { + return nil, err + } + + return nil, nil +} + +func (i *Interpreter) getExpr(v ast.GetExpr) (any, error) { + obj, err := i.evaluate(v.Object) + if err != nil { + return nil, err + } + + if val, ok := obj.(Struct); ok { + name := v.Expr.(ast.Variable) + if name.Index == nil { + expr, _ := val.env.Get(name.Name.Value, true) + return expr.Value, nil + } + + arr, _ := val.env.GetArray(name.Name.Value, true) + index, err := i.evaluate(name.Index) + if err != nil { + return nil, err + } + return arr[index.(ast.Number)], nil + } + + // shouldn't happen + return nil, errors.NewError(errors.Unimplemented, v.Resolve().Value.Loc, "unimplemented get-expr") +} + +func (i *Interpreter) returnStmt(v ast.Return) (any, error) { + var val any + + if v.Value != nil { + // could be a tail call we could optimize + if call, ok := v.Value.(ast.Call); ok { + i.fError = v.Error + panic(&PanicJump{typ: PanicJumpTailCall, value: call, loc: v.Loc}) + } + + var err error + val, err = i.evaluate(v.Value) + if err != nil { + return nil, err + } + } + + i.fError = v.Error + panic(&PanicJump{typ: PanicJumpReturn, value: val, loc: v.Loc}) +} + +func (i *Interpreter) continueStmt(v ast.Continue) (any, error) { + panic(&PanicJump{typ: PanicJumpContinue, loc: v.Loc}) +} + +func (i *Interpreter) breakStmt(v ast.Break) (any, error) { + panic(&PanicJump{typ: PanicJumpBreak, loc: v.Loc}) +} + +func (i *Interpreter) exprList(v ast.ExprList) (any, error) { + // XXX: should we set last? + vals := make([]any, len(v.Exprs)) + for n, expr := range v.Exprs { + val, err := i.evaluate(expr) + if err != nil { + return nil, err + } + vals[n] = val + } + return vals, nil +} diff --git a/interpreter/interpreter_test.go b/interpreter/interpreter_test.go new file mode 100644 index 0000000..d9c63ae --- /dev/null +++ b/interpreter/interpreter_test.go @@ -0,0 +1,754 @@ +package interpreter + +import ( + "fmt" + "strings" + "testing" + + "usebox.net/lang/ast" + "usebox.net/lang/errors" + "usebox.net/lang/parser" + "usebox.net/lang/tokenizer" +) + +func run(input string) (any, error) { + tr := tokenizer.NewTokenizer("-", strings.NewReader(input)) + toks, err := tr.Scan() + if err != nil { + return nil, err + } + p := parser.NewParser(BuiltInTypes()) + tree, err := p.Parse(toks) + if err != nil { + errs := err.(errors.ErrorList) + return nil, errs.Errors[0] + } + + i := NewInterpreter() + var val any + for _, expr := range tree { + if val, err = i.Interp(expr); err != nil { + return nil, err + } + } + return val, nil +} + +func expectNumber(t *testing.T, input string, n ast.Number) { + val, err := run(input) + if err != nil { + t.Errorf("Unexpected error %s (input: %s)", err, input) + } + i, ok := val.(ast.Number) + if !ok { + t.Errorf("Number expected, got %T instead (input: %s)", val, input) + } else if i != n { + t.Errorf("%d expected, got %d instead (input: %s)", n, i, input) + } +} + +func expectBool(t *testing.T, input string, b bool) { + val, err := run(input) + if err != nil { + t.Errorf("Unexpected error %s (input: %s)", err, input) + } + i, ok := val.(bool) + if !ok { + t.Errorf("bool expected, got %T instead (input: %s)", val, input) + } else if i != b { + t.Errorf("%v expected, got %v instead (input: %s)", b, i, input) + } +} + +func expectString(t *testing.T, input string, s string) { + val, err := run(input) + if err != nil { + t.Errorf("unexpected error %s (input: %s)", err, input) + } + i, ok := val.(string) + if !ok { + t.Errorf("string expected, got %T instead (input: %s)", val, input) + } else if i != s { + t.Errorf("%s expected, got %s instead (input: %s)", s, i, input) + } +} + +func expectError(t *testing.T, input string, code errors.ErrCode) { + _, err := run(input) + if err == nil { + t.Errorf("expected error and didn't happen (input: %s)", input) + } + if e, ok := err.(errors.Error); !ok { + t.Errorf("error not an Error (input: %s): %s", input, err) + } else { + if e.Code != code { + t.Errorf("error %d expected, got %d -> %s (input: %s)", code, e.Code, e, input) + } + } +} + +func TestNumExpr(t *testing.T) { + expectNumber(t, "1;", 1) + expectNumber(t, "123;", 123) + expectNumber(t, "0x123;", 0x123) + expectNumber(t, "0b101;", 0b101) + expectNumber(t, "1 + 1;", 2) + expectNumber(t, "2 + 3 * 4;", 14) + expectNumber(t, "(5 - (3 - 1)) + -1;", 2) +} + +func TestInvalidOp(t *testing.T) { + // these operators are only valid for numbers + + expectError(t, "true + \"a\";", errors.InvalidOperation) + expectError(t, "\"a\" + false;", errors.InvalidOperation) + expectError(t, "\"a\" + \"a\";", errors.InvalidOperation) + expectError(t, "-false;", errors.InvalidOperation) + expectError(t, "-true;", errors.InvalidOperation) + expectError(t, "~false;", errors.InvalidOperation) + expectError(t, "~true;", errors.InvalidOperation) + expectError(t, "-\"a\";", errors.InvalidOperation) + expectError(t, "~\"a\";", errors.InvalidOperation) + + for _, op := range []string{ + "-", "+", "*", "/", "%", "&", "<<", ">>", "|", "^", "<", ">", "<=", ">=", + } { + expectError(t, fmt.Sprintf("1 %s true;", op), errors.InvalidOperation) + expectError(t, fmt.Sprintf("true %s 1;", op), errors.InvalidOperation) + expectError(t, fmt.Sprintf("1 %s \"a\";", op), errors.InvalidOperation) + expectError(t, fmt.Sprintf("\"a\" %s 1;", op), errors.InvalidOperation) + } + + // these are invalid with numbers too + expectError(t, "1 / 0;", errors.InvalidOperation) + expectError(t, "1 % 0;", errors.InvalidOperation) +} + +func TestAssign(t *testing.T) { + expectNumber(t, "var a number; a = 1;", 1) + expectString(t, "var a string = \"v\"; var b string = a; b;", "v") + expectBool(t, "var a number = 1; var b number = 2; var c number = 3; a = b = c; a == b && a== 3;", true) + expectBool(t, "var a number; var b number = a = 1; a == b && a == 1;", true) + + // local + expectNumber(t, `{ + var a number = 1; + if a != 1 { + panic("1 expected"); + } + a = 2; + if a != 2 { + panic("2 expected"); + } + println(a = 3); + a; + }`, 3) + + // scopes and shadowing + expectNumber(t, ` + var a number = 1; + if a != 1 { + panic("1 expected"); + } + { + var a number = 2; + if a != 2 { + panic("2 expected"); + } + } + a;`, 1) + + expectError(t, "a = 1;", errors.UndefinedIdent) + + // syntax error; perhaps should be in the parser + if _, err := run("var a number; (a) = 1;"); err == nil { + t.Error("error expected by didn't happen") + } + if _, err := run("var a number; a + a = 1;"); err == nil { + t.Error("error expected by didn't happen") + } +} + +func TestPrintln(t *testing.T) { + expectNumber(t, "println();", 1) + expectNumber(t, "println(1);", 2) + expectNumber(t, "println(true);", 5) + expectNumber(t, "println(\"hello\");", 6) + expectNumber(t, "println(1, \"one\");", 5) +} + +func TestEquality(t *testing.T) { + expectBool(t, "true == true;", true) + expectBool(t, "true == false;", false) + expectBool(t, "false == true;", false) + expectBool(t, "false == false;", true) + + expectBool(t, "1 == 1;", true) + expectBool(t, "0 != 1;", true) + expectBool(t, "\"a\" == \"a\";", true) + expectBool(t, "\"a\" != \"b\";", true) + + expectBool(t, "true != true;", false) + expectBool(t, "true != false;", true) + expectBool(t, "false != true;", true) + expectBool(t, "false != false;", false) + + expectError(t, "true == 1;", errors.TypeMismatch) + expectError(t, "true == \"true\";", errors.TypeMismatch) + expectError(t, "false == 0;", errors.TypeMismatch) + expectError(t, "false == \"\";", errors.TypeMismatch) + expectError(t, "false == \"false\";", errors.TypeMismatch) +} + +func TestNot(t *testing.T) { + expectBool(t, "!true;", false) + expectBool(t, "!false;", true) + + expectBool(t, "!0;", true) + expectBool(t, "!1;", false) + expectBool(t, "!\"\";", true) + expectBool(t, "!\"a\";", false) + + expectBool(t, "!!true;", true) +} + +func TestLogical(t *testing.T) { + expectBool(t, "true && true;", true) + expectBool(t, "true && false;", false) + expectBool(t, "false && true;", false) + + expectBool(t, "true || true;", true) + expectBool(t, "true || false;", true) + expectBool(t, "false || true;", true) + + expectBool(t, "1 == 1 && 1 != 2;", true) + expectBool(t, "1 != 1 || 1 != 2;", true) + + expectBool(t, "1 & 1 && 0 | 1;", true) + expectBool(t, "1 & 1 && 2 & 1;", false) + expectBool(t, "1 & 1 || 0 | 1;", true) + expectBool(t, "2 & 1 || 1 & 2;", false) +} + +func TestIfElse(t *testing.T) { + expectBool(t, "if 1 == 1 { true; }", true) + + val, err := run("if 1 != 1 { true; }") + if err != nil { + t.Errorf("unexpected error %s", err) + } + if val != nil { + t.Errorf("none expected, %T found", val) + } + + expectBool(t, "if 1 != 1 { false; } else { true; }", true) + expectBool(t, "if 1 == 1 { false; } else { true; }", false) + + expectBool(t, "if 1 != 1 { false; } else { if 1 == 1 { true; }}", true) + + val, err = run("if 1 != 1 { true; } else { if 1 != 1 { false; }}") + if err != nil { + t.Errorf("unexpected error %s", err) + } + if val != nil { + t.Errorf("none expected, %T found", val) + } +} + +func TestPanic(t *testing.T) { + _, err := run("panic(\"error\");") + if err == nil { + t.Error("error expected and didn't happen") + } +} + +func TestFunc(t *testing.T) { + expectString(t, ` + def wrap() func () string { + def inFn() string { + return "output"; + } + return inFn; + } + var fn func () string = wrap(); + fn(); + `, "output") + + expectNumber(t, ` + def makeCounter() func () number { + var i number; + def count() number { + i = i + 1; + return i; + } + return count; + } + makeCounter()(); + `, 1) + + expectBool(t, ` + // why not? + def loop(from number, to number, fn func (number)) { + if from > to { + return; + } + fn(from); + // return is needed to trigger the tail-call optimization + return loop(from + 1, to, fn); + } + + def wilsonPrime(n number) bool { + var acc number = 1; + def fact(i number) { + acc = (acc * i) % n; + } + loop(2, n - 1, fact); + return acc == n - 1; + } + + wilsonPrime(1789); + `, true) + + expectNumber(t, ` + var a number = 1; + { + var c number; + def add() { + c = c + a; + println(c, " ", a); + } + add(); // c is 1 + var a string; + add(); // c is 2 + println(c); + c; + } + `, 2) + + expectBool(t, ` + var a bool; + def fn() func () { + def inFn() { + println("works"); + a = true; + } + return inFn; + } + fn()(); + a;`, true) + + expectBool(t, ` + var a bool; + def fn(a bool) bool { + return a; + } + fn(true);`, true) + + expectString(t, ` + def fn() { } + { + def fn() { } + } + "OK";`, "OK") + +} + +func TestFuncError(t *testing.T) { + expectString(t, ` + var fn func () bool; + "OK";`, "OK") + + expectError(t, ` + var fn func () bool; + fn();`, errors.NotCallable) + + expectError(t, ` + def a() bool { return true; } + var b func () number = a; + `, errors.TypeMismatch) + + expectError(t, ` + // return with no function + return false; + `, errors.InvalidOperation) + + expectError(t, ` + def fn() bool { return 0; } + fn(); + `, errors.TypeMismatch) + + expectError(t, ` + def fn() bool { } + fn(); + `, errors.NoReturn) + + expectError(t, "true();", errors.NotCallable) + expectError(t, "\"bad\"();", errors.NotCallable) + expectError(t, "10();", errors.NotCallable) + expectError(t, "true(1);", errors.NotCallable) + expectError(t, "\"bad\"(1);", errors.NotCallable) + expectError(t, "10(1);", errors.NotCallable) + + expectError(t, ` + var fn func() number = println; + fn("hello");`, errors.CallError) +} + +func TestFor(t *testing.T) { + expectString(t, ` + for false { + panic("oops"); + } + "OK"; + `, "OK") + + // the value emited by the loops is the last + // evaluated value inside the loop + expectNumber(t, ` + var i number; + for i < 10 { + i = i + 1; + } + `, 10) + + expectNumber(t, ` + def mkGen() func () number { + var n number; + def gen() number { + n = n + 1; + return n; + } + return gen; + } + var c func() number = mkGen(); + var acc number; + for c() < 10 { + acc = acc + 1; + } + acc; + `, 9) + + expectNumber(t, ` + var acc number; + var j number; + for j < 10 { + var i number; + for i < 10 { + i = i + 1; + acc = acc + 1; + } + j = j + 1; + } + acc; + `, 100) + + expectError(t, ` + var a number; + for { + a = 1 / a; + } + `, errors.InvalidOperation) +} + +func TestForIn(t *testing.T) { + // the value emited by the loops is the last + // evaluated value inside the loop + expectNumber(t, ` + var arr [3]number = [1, 2, 10]; + for i in arr { + i; + } + `, 10) + + // shadow the array variable + expectNumber(t, ` + var a [3]number = [1, 2, 10]; + for a in a { + println(a); + a; + } + `, 10) + + expectNumber(t, ` + var acc number; + var j [10]number; + for i in j { + for i in j { + acc = acc + 1; + } + } + acc; + `, 100) +} + +func TestContinue(t *testing.T) { + expectString(t, ` + var a number; + for a < 10 { + a = a + 1; + if a & 1 { + continue; + panic("oops"); + } + println(a); + } + "OK"; + `, "OK") + + expectString(t, ` + var arr [5]number = [1, 2, 3, 4, 5]; + for a in arr { + if a & 1 { + continue; + panic("oops"); + } + println(a); + } + "OK"; + `, "OK") + + expectNumber(t, ` + var acc number; + var a number; + for a < 10 { + var b number; + for b < 10 { + b = b + 1; + if b & 1 { + continue; + panic("oops"); + } + acc = acc + b; + } + a = a + 1; + } + acc; + `, (2+4+6+8+10)*10) +} + +func TestBreak(t *testing.T) { + expectString(t, ` + for { + break; + panic("oops"); + } + "OK"; + `, "OK") + + expectString(t, ` + var a [10] number; + for i in a { + break; + panic("oops"); + } + "OK"; + `, "OK") + + expectNumber(t, ` + var a number; + for { + if a == 20 { + break; + } + a = a + 1; + for true { + a = a + 1; + break; + a = a + 1; + } + } + a; + `, 20) +} + +func TestArray(t *testing.T) { + expectError(t, "var a [0]number;", errors.InvalidLength) + expectError(t, "var a [1]number; a[-1];", errors.InvalidIndex) + expectError(t, "var a [1]number; a[2];", errors.InvalidIndex) + expectError(t, "var a [1]number; a[-1] = 1;", errors.InvalidIndex) + expectError(t, "var a [1]number; a[2] = 1;", errors.InvalidIndex) + expectError(t, "var a [5][5]number;", errors.InvalidType) + expectError(t, "var a number; len(a);", errors.TypeMismatch) + + expectNumber(t, ` + var arr [5]number = [1, 2, 3, 4, 5]; + + var i number = 1; + for i < len(arr) { + arr[0] = arr[0] + arr[i]; + i = i + 1; + } + arr[0]; + `, 1+2+3+4+5) + + expectString(t, ` + var arr [3]string = ["one", "two", "three"]; + var arr2 [3]string; + + var i number; + for i < len(arr) { + arr2[i] = arr[i]; + i = i + 1; + } + i = 0; + for i < len(arr) { + if arr2[i] != arr[i] { + panic("failed"); + } + i = i + 1; + } + "OK"; + `, "OK") + + expectBool(t, ` + var a [5]bool; + def fn(v [5]bool) { + v[2] = true; + } + fn(a); + println(a); + a[2]; + `, true) + + expectNumber(t, ` + const a [3]number = [1, 2, 3]; + const b [3]number = [3, 2, 1]; + const c [3]number = a; + c = b; + c[0]; + `, 3) + + expectNumber(t, ` + var acc number; + + def a() { + println("this is a"); + acc = acc + 10; + } + + def b() { + println("this is b"); + acc = acc + 10; + } + + var arr [2]func () = [a, b]; + + for i in arr { + i(); + } + acc;`, 20) +} + +func TestErrorTag(t *testing.T) { + expectString(t, ` + def fn() { return !?; } + if ? fn() { + "KO"; + } + `, "KO") + expectString(t, ` + def fn() { return; } + if ? fn() { + "KO"; + } else { + "OK"; + } + `, "OK") + expectString(t, ` + def fn() number { return !? 0; } + if ? fn() { + "KO"; + } + `, "KO") + expectString(t, ` + def fn() number { return 0; } + if !(? fn()) { + "OK"; + } + `, "OK") + expectNumber(t, ` + def fn() number { return !? 99; } + var n number; + if ? (n = fn()) { + n; + } + `, 99) + expectString(t, ` + def fn() { return !?; } + fn(); + // ? resets the error flag before evaluating the right side + if ? true { + "KO"; + } else { + "OK"; + } + `, "OK") +} + +func TestStruct(t *testing.T) { + expectString(t, ` + def A { var p string = "OK"; } + var a A; + a.p; + `, "OK") + expectNumber(t, ` + def A { var p number = 10; } + var a A; + a.p; + `, 10) + expectNumber(t, ` + def A { var p number; } + var a A; + a.p = 10; + a.p; + `, 10) + expectNumber(t, ` + def A { var p number = 10; } + var a A; + a.p = 100; + var b A; + b.p; + `, 10) + expectNumber(t, ` + def A { var p number = 10; } + var a [3]A; + a[0].p + a[1].p + a[2].p; + `, 30) + expectNumber(t, ` + def A { + var arr [3]number = [1, 2, 3]; + + def p(index number) number { + return arr[index]; + } + } + var a A; + a.p(0) + a.p(1) + a.p(2); + `, 6) + expectNumber(t, ` + def A { + var p number; + } + def B { + var arr [3]A; + var i number; + + def new() A { + var r A = arr[i]; + r.p = i + 1; + i = (i + 1) % 3; + return r; + } + } + var b B; + b.new().p + b.new().p + b.new().p; + // new should have returned a reference + b.arr[0].p + b.arr[1].p + b.arr[2].p; + `, 6) + + expectError(t, ` + def A { + var a A; + } + `, errors.RecursiveStruct) +} diff --git a/micro-vim/ftdetect/micro.vim b/micro-vim/ftdetect/micro.vim new file mode 100644 index 0000000..cd69d70 --- /dev/null +++ b/micro-vim/ftdetect/micro.vim @@ -0,0 +1,3 @@ +autocmd BufNewFile,BufRead *.micro set filetype=micro +autocmd BufNewFile,BufRead *.cro set filetype=micro + diff --git a/micro-vim/ftplugin/micro.vim b/micro-vim/ftplugin/micro.vim new file mode 100644 index 0000000..ea3ce5a --- /dev/null +++ b/micro-vim/ftplugin/micro.vim @@ -0,0 +1,26 @@ + +if exists("b:did_ftplugin") + finish +endif +let b:did_ftplugin = 1 + +let s:save_cpo = &cpo +set cpo-=C + +setlocal suffixesadd+=.micro,.cro +setlocal comments=b://,fb:- +setlocal commentstring=//\ %s + +setlocal smartindent cindent + +setlocal tabstop=4 shiftwidth=4 softtabstop=4 expandtab + +" run make editing a micro file and it will be parsed for errors +setlocal makeprg=micro\ -parse\ % +setlocal errorformat=%f:%l\ col\ %c\ %trror:\ %m + +let b:undo_ftplugin="setl com<" + +let &cpo = s:save_cpo +unlet s:save_cpo + diff --git a/micro-vim/syntax/micro.vim b/micro-vim/syntax/micro.vim new file mode 100644 index 0000000..f620579 --- /dev/null +++ b/micro-vim/syntax/micro.vim @@ -0,0 +1,90 @@ +if exists("b:current_syntax") + finish +endif + +let s:cpo_save = &cpo +set cpo&vim + +syn clear +syn sync fromstart + +syn match microNumber /\<[0-9]\d*\>/ + +syn match microEscapeHex /\\x\x{2}/ transparent contained +syn match microEscapeChar /\\[nt\\']/ transparent contained +syn match microEscapeString /\\[nt\\"]/ transparent contained + +syn cluster microCharacterGroup contains=microEscapeChar,microEscapeHex +syn region microCharacter start=/'/ skip=/\\\\\|\\'/ end=/'/ contains=@microCharacterGroup + +syn cluster microStringGroup contains=microEscapeString,microEscapeHex +syn region microString start=/"/ skip=/\\\\\|\\"/ end=/"/ contains=@Spell,microStringGroup + +syn keyword microConditional if else continue break +syn keyword microRepeat for in +syn keyword microType number bool string func +syn keyword microBoolean true false +syn keyword microKeyword len +syn keyword microStatement return nextgroup=microTagE skipwhite skipempty + +syn match microTagE /!?/ display contained + +"" this is probably not correct +syn match microOperator display "?\|+\|-\|/\|*\|=\|=\|\^\|&\||\|!\|!=\|>\|>=\|<\|<=\|\~\|%" +syn match microOperator display "&&\|||" + +syn keyword microKeyword def nextgroup=microFuncName skipwhite skipempty +syn match microFuncName /[_a-zA-Z][_a-zA-Z0-9]*/ nextgroup=microFuncParams display contained +syn region microFuncParams start=/(/ end=/)/ contains=microVarIdent,microNumber,microChar nextgroup=microVarType,microVarArray skipwhite contained + +syn match mucroPropStart /\./ nextgroup=microProp skipwhite +syn match microProp /[_a-zA-Z][_a-zA-Z0-9]*/ display contained + +syn keyword microKeyword var nextgroup=microVarIdent skipwhite skipempty +syn keyword microKeyword const nextgroup=microVarIdent skipwhite skipempty +syn match microVarIdent /[_a-zA-Z][_a-zA-Z0-9]*/ nextgroup=microVarType,microVarArray skipwhite +syn match microVarArray /\[[0-9]*\]/ nextgroup=microVarType transparent contained skipwhite +syn match microVarType /[_a-zA-Z][_a-zA-Z0-9]*/ display contained + +syn match microCall /[_a-zA-Z][_a-zA-Z0-9]*(/he=e-1,me=e-1 + +syn region microIndex start=/\[/ end=/\]/ contains=microNumber,microChar,microIndexIdent,microIndexCall +syn match microIndexIdent /[_a-zA-Z][_a-zA-Z0-9]*/ display contained +syn match microIndexCall /[_a-zA-Z][_a-zA-Z0-9]*(/he=e-1,me=e-1 display contained + +syn keyword microBuiltin println panic + +syn region microBlock start="{" end="}" transparent fold + +syn keyword microCommentTodo TODO FIXME XXX BUG contained +syn match microLineComment "\/\/.*" contains=@Spell,microCommentTodo + +hi def link microLineComment Comment +hi def link microCommentTodo Todo +hi def link microNumber Number +hi def link microCharacter Character +hi def link microString String +hi def link microStrEmbeddedQuote String +hi def link microConditional Conditional +hi def link microRepeat Repeat +hi def link microOperator Operator +hi def link microType Type +hi def link microVarType Type +hi def link microBoolean Boolean +hi def link microKeyword Keyword +hi def link microStatement Statement +hi def link microTagE Keyword + +hi def link microBuiltin Function +hi def link microFuncName Function +hi def link microCall Function +hi def link microProp Identifier + +hi def link microIndexIdent Number +hi def link microIndexCall Function + +let b:current_syntax = "micro" + +let &cpo = s:cpo_save +unlet s:cpo_save + diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..65007b7 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,1168 @@ +package parser + +import ( + "fmt" + "strconv" + + "usebox.net/lang/ast" + "usebox.net/lang/errors" + "usebox.net/lang/tokens" +) + +// XXX: duplicated +const runtime = "" + +type Env struct { + env map[string]ast.Type + parent *Env +} + +func NewEnv(parent *Env) *Env { + return &Env{make(map[string]ast.Type), parent} +} + +func (e *Env) Get(key string, local bool) (ast.Type, bool) { + if v, ok := e.env[key]; ok { + return v, true + } + if !local && e.parent != nil { + return e.parent.Get(key, local) + } + return ast.TypeNone, false +} + +func (e *Env) Set(key string, typ ast.Type) *Env { + n := e + if _, ok := e.Get(key, false); ok { + n = NewEnv(e) + } + n.env[key] = typ + return n +} + +type Parser struct { + tokens []tokens.Token + pos int + env *Env + structs map[string]*Env + currFuncRet *ast.Type + currLoop bool +} + +func NewParser(builtIns map[string]ast.Type) Parser { + global := NewEnv(nil) + for k, v := range builtIns { + // not shadowing any value + global.Set(k, v) + } + + return Parser{env: global, structs: make(map[string]*Env)} +} + +func (p *Parser) Parse(tokens []tokens.Token) ([]ast.Expr, error) { + var errs []error + var exprs []ast.Expr + + p.tokens = tokens + p.pos = 0 + + for !p.end() { + expr, err := p.statement() + if err != nil { + errs = append(errs, err) + p.recover() + continue + } + exprs = append(exprs, expr) + } + + if len(errs) > 0 { + return nil, errors.ErrorList{Errors: errs} + } + + return exprs, nil +} + +func (p *Parser) recover() { + p.next() + + for !p.end() { + if p.prev().Id == tokens.Semicolon { + return + } + if p.match([]tokens.TokenId{tokens.Def, tokens.Var, tokens.Const, tokens.If, tokens.Return, tokens.For}) { + p.back() + return + } + + p.next() + } +} + +func (p *Parser) next() tokens.Token { + if !p.end() { + p.pos++ + return p.prev() + } + return p.curr() +} + +func (p *Parser) back() { + if p.pos != 0 { + p.pos-- + } +} + +func (p *Parser) prev() tokens.Token { + return p.tokens[p.pos-1] +} + +func (p *Parser) curr() tokens.Token { + return p.tokens[p.pos] +} + +func (p *Parser) end() bool { + return p.curr().Id == tokens.Eof +} + +func (p *Parser) check(id tokens.TokenId) bool { + if p.end() { + return false + } + return p.curr().Id == id +} + +func (p *Parser) match(ids []tokens.TokenId) bool { + for _, t := range ids { + if p.check(t) { + p.next() + return true + } + } + return false +} + +func (p *Parser) statement() (ast.Expr, error) { + if p.match([]tokens.TokenId{tokens.Var, tokens.Const}) { + return p.declaration() + } + + if p.check(tokens.Break) { + p.next() + return p.breakStmt() + } + + if p.check(tokens.Continue) { + p.next() + return p.continueStmt() + } + + if p.check(tokens.Return) { + p.next() + return p.returnStmt() + } + + if p.check(tokens.For) { + p.next() + return p.forStmt() + } + + if p.check(tokens.If) { + return p.ifElse() + } + + if p.check(tokens.LBrace) { + return p.block() + } + + if p.check(tokens.Def) { + p.next() + return p.definition() + } + + expr, err := p.expr() + if err != nil { + return nil, err + } + if p.next().Id != tokens.Semicolon { + return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression") + } + return expr, nil +} + +func (p *Parser) expr() (ast.Expr, error) { + return p.assign() +} + +func (p *Parser) typ() (*ast.Type, error) { + if p.check(tokens.LBracket) { + tok := p.next() + + size, err := p.primary() + if err != nil { + return nil, err + } + + if p.next().Id != tokens.RBracket { + return nil, errors.NewError(errors.ExpectedRBracket, p.prev().Loc, "']' expected in array definition") + } + + typ, err := p.typ() + if err != nil { + return nil, err + } + + if typ.Value.Id == tokens.TArray { + return nil, errors.NewError(errors.InvalidType, p.prev().Loc, "invalid array type", typ.String()) + } + + if size.Resolve().Compare(&ast.TypeNumber) { + if v, ok := size.(ast.Literal); ok { + tok.Id = tokens.TArray + intval := int(v.Numval) + return &ast.Type{Value: tok, Ret: typ, Len: &intval}, nil + } + } + + return nil, errors.NewError(errors.InvalidValue, p.prev().Loc, "invalid value for array size") + } + + if !p.match([]tokens.TokenId{tokens.TNumber, tokens.TBool, tokens.TString, tokens.TFunc, tokens.Ident}) { + return nil, errors.NewError(errors.ExpectedType, p.curr().Loc, "expected type") + } + typ := p.prev() + + if typ.Id == tokens.Ident { + typ, ok := p.env.Get(typ.Value, false) + if !ok || typ.Value.Id != tokens.TStruct { + return nil, errors.NewError(errors.ExpectedType, p.curr().Loc, "expected type") + } + if typ.Inst { + return nil, errors.NewError(errors.ExpectedType, p.curr().Loc, "expected type found instance of", typ.String()) + } + // this is an instance of the struct + typ.Inst = true + return &typ, nil + } + + if typ.Id != tokens.TFunc { + return &ast.Type{Value: typ}, nil + } + + if p.next().Id != tokens.LParen { + return nil, errors.NewError(errors.ExpectedLParen, p.prev().Loc, "'(' expected in function type") + } + + params := make([]ast.Type, 0, 16) + for !p.check(tokens.RParen) { + typ, err := p.typ() + if err != nil { + return nil, err + } + + params = append(params, *typ) + if !p.check(tokens.Comma) { + break + } + p.next() + } + p.next() + + var ret *ast.Type + if !p.check(tokens.Semicolon) && !p.check(tokens.Assign) && !p.check(tokens.RParen) && !p.check(tokens.LBrace) { + if typ, err := p.typ(); err != nil { + return nil, err + } else { + ret = typ + } + } + + return &ast.Type{Value: typ, Params: params, Ret: ret}, nil +} + +func (p *Parser) definition() (ast.Expr, error) { + name := p.next() + if name.Id != tokens.Ident { + return nil, errors.NewError(errors.ExpectedIdent, name.Loc, "expected identifier") + } + + if p.check(tokens.LBrace) { + return p.strctDecl(name) + } + + return p.funcDecl(name) +} + +func (p *Parser) strctDecl(name tokens.Token) (ast.Expr, error) { + if v, ok := p.env.Get(name.Value, true); ok { + loc := v.Value.Loc.String() + if v.Value.Loc.File == "" { + loc = runtime + } + return nil, errors.NewError(errors.AlreadyDeclared, name.Loc, "struct", name.String(), "already declared in", loc) + } + + p.env = p.env.Set(name.Value, *ast.NewStructType(name)) + pEnv := p.env + sEnv := NewEnv(pEnv) + p.env = sEnv + + expr, err := p.strctBlock() + if err != nil { + return nil, err + } + + if len(sEnv.env) == 0 { + return nil, errors.NewError(errors.InvalidValue, name.Loc, "empty struct") + } + + p.structs[name.Value] = sEnv + p.env = pEnv + + return ast.Struct{Name: name, Body: expr.(ast.Block)}, nil +} + +func (p *Parser) funcDecl(name tokens.Token) (ast.Expr, error) { + if p.next().Id != tokens.LParen { + return nil, errors.NewError(errors.ExpectedLParen, p.prev().Loc, "'(' expected in function declaration") + } + + params := make([]ast.Var, 0, 16) + paramTypes := make([]ast.Type, 0, 16) + seen := map[string]tokens.Location{} + for !p.check(tokens.RParen) { + pname := p.next() + if pname.Id != tokens.Ident { + return nil, errors.NewError(errors.ExpectedIdent, pname.Loc, "expected parameter name") + } + + typ, err := p.typ() + if err != nil { + return nil, err + } + + if prev, ok := seen[pname.Value]; ok { + return nil, errors.NewError(errors.AlreadyDeclared, pname.Loc, "parameter", pname.String(), "already declared in", prev.String()) + } + seen[pname.Value] = pname.Loc + + params = append(params, ast.Var{Name: pname, Type: *typ}) + paramTypes = append(paramTypes, *typ) + if !p.check(tokens.Comma) { + break + } + p.next() + } + p.next() + + var ret *ast.Type + if !p.check(tokens.LBrace) { + if typ, err := p.typ(); err != nil { + return nil, err + } else { + ret = typ + } + } + + if v, ok := p.env.Get(name.Value, true); ok { + loc := v.Value.Loc.String() + if v.Value.Loc.File == "" { + loc = runtime + } + return nil, errors.NewError(errors.AlreadyDeclared, name.Loc, "function", name.String(), "already declared in", loc) + } + p.env = p.env.Set(name.Value, *ast.NewFuncType(name, paramTypes, ret)) + + oldFuncRet := p.currFuncRet + if ret != nil { + p.currFuncRet = ret + } else { + p.currFuncRet = &ast.TypeNone + } + defer func() { + p.currFuncRet = oldFuncRet + }() + + for _, v := range params { + p.env = p.env.Set(v.Name.Value, v.Type) + } + + expr, err := p.block() + if err != nil { + return nil, err + } + + // return checks? + return ast.Func{Name: name, Params: params, Ret: ret, Body: expr.(ast.Block)}, nil +} + +func (p *Parser) exprList() (ast.Expr, error) { + if p.check(tokens.LBracket) { + p.next() + vals := make([]ast.Expr, 0, 16) + if !p.check(tokens.RBracket) { + var prev ast.Expr + for { + val, err := p.expr() + if err != nil { + return nil, err + } + if prev != nil && !prev.Resolve().Compare(val.Resolve()) { + return nil, errors.NewError(errors.TypeMismatch, p.prev().Loc, "type mismatch in expression list: found", val.Resolve().String(), "expected", prev.Resolve().String()) + } + prev = val + + vals = append(vals, val) + if !p.check(tokens.Comma) { + break + } + p.next() + } + if !p.check(tokens.RBracket) { + return nil, errors.NewError(errors.ExpectedRBracket, p.prev().Loc, "']' expected to end expression list") + } + } + p.next() + if len(vals) == 0 { + return nil, errors.NewError(errors.InvalidValue, p.prev().Loc, "empty expression list") + } + + return ast.ExprList{Exprs: vals}, nil + } + + return p.expr() +} + +func (p *Parser) decl(con bool) (ast.Expr, error) { + name := p.next() + if name.Id != tokens.Ident { + return nil, errors.NewError(errors.ExpectedIdent, name.Loc, "expected identifier") + } + + typ, err := p.typ() + if err != nil { + return nil, err + } + + if typ.Value.Id == tokens.TStruct { + if _, ok := p.structs[typ.Value.Value]; !ok { + return nil, errors.NewError(errors.RecursiveStruct, p.curr().Loc, "recursive definition of", typ.String()) + } + } + + var initv ast.Expr + if p.check(tokens.Assign) { + p.next() + var err error + initv, err = p.exprList() + if err != nil { + return nil, err + } + } + + if p.next().Id != tokens.Semicolon { + return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression") + } + + if v, ok := p.env.Get(name.Value, true); ok { + loc := v.Value.Loc.String() + if v.Value.Loc.File == "" { + loc = runtime + } + return nil, errors.NewError(errors.AlreadyDeclared, name.Loc, name.String(), "already declared in", loc) + } + + if initv != nil { + if v, ok := initv.(ast.ExprList); ok { + if typ.Value.Id != tokens.TArray { + return nil, errors.NewError(errors.InvalidValue, name.Loc, "expression list to initialize a type", typ.String()) + } + if *typ.Len != len(v.Exprs) { + return nil, errors.NewError(errors.InvalidValue, name.Loc, "invalid number of elements in expression list to initialize a type", typ.String()) + } + if !typ.Ret.Compare(initv.Resolve()) { + return nil, errors.NewError(errors.TypeMismatch, name.Loc, "type mismatch expected list of", typ.Ret.String(), "found", initv.Resolve().String()) + } + } else if !typ.Compare(initv.Resolve()) { + return nil, errors.NewError(errors.TypeMismatch, name.Loc, "type mismatch expected", typ.String(), "found", initv.Resolve().String()) + } + if initv.Resolve().Value.Id == tokens.TArray || initv.Resolve().Value.Id == tokens.TStruct { + if !con && initv.Resolve().ResolveConst() { + return nil, errors.NewError(errors.TypeMismatch, name.Loc, "variable cannot be used as referece to constant") + } + typ.Ref = true + } + } + if typ.Value.Id == tokens.TStruct { + // it is an instance + p.structs[name.Value] = p.structs[typ.Value.Value] + typ.Inst = true + } + if con { + if initv == nil { + return nil, errors.NewError(errors.ExpectedAssign, name.Loc, "expected assignation in constant declaration") + } + if typ.Value.Id == tokens.TArray { + // allow changing reference but not the values of the array + typ.Ret.Const = true + } else { + typ.Const = true + } + } + + p.env = p.env.Set(name.Value, *typ) + return ast.Var{Name: name, Type: *typ, Initv: initv}, nil +} + +func (p *Parser) declaration() (ast.Expr, error) { + if p.prev().Id == tokens.Const { + return p.decl(true) + } + return p.decl(false) +} + +func (p *Parser) continueStmt() (ast.Expr, error) { + if p.next().Id != tokens.Semicolon { + return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression") + } + + if !p.currLoop { + return nil, errors.NewError(errors.InvalidOperation, p.curr().Loc, "continue without loop") + } + + return ast.Continue{Loc: p.curr().Loc}, nil +} + +func (p *Parser) breakStmt() (ast.Expr, error) { + if p.next().Id != tokens.Semicolon { + return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression") + } + + if !p.currLoop { + return nil, errors.NewError(errors.InvalidOperation, p.curr().Loc, "break without loop") + } + + return ast.Break{Loc: p.curr().Loc}, nil +} + +func (p *Parser) returnStmt() (ast.Expr, error) { + if p.currFuncRet == nil { + return nil, errors.NewError(errors.InvalidOperation, p.prev().Loc, "return without function call") + } + + var isError bool + if p.check(tokens.TagE) { + p.next() + isError = true + } + + if p.check(tokens.Semicolon) { + p.next() + if p.currFuncRet.Value.Id != tokens.None { + return nil, errors.NewError(errors.TypeMismatch, p.prev().Loc, "type mismatch in return value, expected", p.currFuncRet.String()) + } + return ast.Return{Loc: p.curr().Loc, Error: isError}, nil + } + + expr, err := p.expr() + if err != nil { + return nil, err + } + + if p.next().Id != tokens.Semicolon { + return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression") + } + + exprRet := expr.Resolve() + if exprRet == nil { + exprRet = &ast.TypeNone + } else { + if exprRet.Value.Id == tokens.TArray || exprRet.Value.Id == tokens.TStruct { + if v, ok := expr.(ast.Variable); ok { + if val, ok := p.env.Get(v.Name.Value, true); ok && !val.Ref { + return nil, errors.NewError(errors.InvalidOperation, p.prev().Loc, "returning a local value of type", exprRet.String()) + } + } + } + } + + if !p.currFuncRet.Compare(exprRet) { + return nil, errors.NewError(errors.TypeMismatch, p.prev().Loc, "type mismatch in return value, expected", p.currFuncRet.String(), "found", exprRet.String()) + } + + return ast.Return{Value: expr, Loc: p.prev().Loc, Error: isError}, nil +} + +func (p *Parser) forBody() (ast.Expr, error) { + oldCurrLoop := p.currLoop + p.currLoop = true + defer func() { + p.currLoop = oldCurrLoop + }() + + return p.block() +} + +func (p *Parser) forStmt() (ast.Expr, error) { + var expr ast.Expr + var err error + + if !p.check(tokens.LBrace) { + if p.check(tokens.Ident) { + ident := p.next() + if p.check(tokens.In) { + p.next() + expr, err = p.expr() + if err != nil { + return nil, err + } + if expr.Resolve().Value.Id != tokens.TArray { + return nil, errors.NewError(errors.TypeMismatch, p.next().Loc, "expected array in for-in statement found", expr.Resolve().String()) + } + + pEnv := p.env + p.env = NewEnv(pEnv) + defer func() { + p.env = pEnv + }() + p.env = p.env.Set(ident.Value, *expr.Resolve().Ret) + + stmts, err := p.forBody() + if err != nil { + return nil, err + } + return ast.ForIn{Name: ident, Expr: expr, Stmts: stmts}, nil + } + p.back() + } + + expr, err = p.expr() + if err != nil { + return nil, err + } + } + + stmts, err := p.forBody() + if err != nil { + return nil, err + } + return ast.For{Cond: expr, Stmts: stmts}, nil +} + +func (p *Parser) ifElse() (ast.Expr, error) { + ifTok := p.next() + expr, err := p.expr() + if err != nil { + return nil, err + } + ifBlock, err := p.block() + if err != nil { + return nil, err + } + + var elseBlock ast.Expr + if p.check(tokens.Else) { + p.next() + elseBlock, err = p.block() + if err != nil { + return nil, err + } + } else { + elseBlock = ast.Literal{Value: tokens.Token{Id: tokens.None, Loc: ifTok.Loc}} + } + return ast.IfElse{Cond: expr, True: ifBlock, False: elseBlock}, nil +} + +func (p *Parser) strctBlock() (ast.Expr, error) { + definitions := make([]ast.Expr, 0, 256) + + if p.next().Id != tokens.LBrace { + return nil, errors.NewError(errors.ExpectedLBrace, p.prev().Loc, "'{' expected to open block") + } + + for !p.check(tokens.RBrace) && !p.end() { + if p.check(tokens.Def) { + p.next() + decl, err := p.definition() + if err != nil { + return nil, err + } + definitions = append(definitions, decl) + continue + } + if p.match([]tokens.TokenId{tokens.Var, tokens.Const}) { + decl, err := p.declaration() + if err != nil { + return nil, err + } + definitions = append(definitions, decl) + continue + } + return nil, errors.NewError(errors.ExpectedDefinition, p.next().Loc, "expected definition in a struct block") + } + + next := p.next() + if next.Id != tokens.RBrace { + // MUST be EOF + return nil, errors.NewError(errors.ExpectedRBrace, next.Loc, "'}' expected to close block, end of file found") + } + + return ast.Block{Stmts: definitions}, nil +} + +func (p *Parser) block() (ast.Expr, error) { + pEnv := p.env + p.env = NewEnv(pEnv) + defer func() { + p.env = pEnv + }() + + statements := make([]ast.Expr, 0, 256) + + if p.next().Id != tokens.LBrace { + return nil, errors.NewError(errors.ExpectedLBrace, p.prev().Loc, "'{' expected to open block") + } + + for !p.check(tokens.RBrace) && !p.end() { + decl, err := p.statement() + if err != nil { + return nil, err + } + statements = append(statements, decl) + } + + next := p.next() + if next.Id != tokens.RBrace { + // MUST be EOF + return nil, errors.NewError(errors.ExpectedRBrace, next.Loc, "'}' expected to close block, end of file found") + } + + return ast.Block{Stmts: statements}, nil +} + +func (p *Parser) assign() (ast.Expr, error) { + expr, err := p.or() + if err != nil { + return nil, err + } + + if p.check(tokens.Assign) { + tok := p.next() + value, err := p.expr() + if err != nil { + return nil, err + } + + left := expr + env := p.env + + if obj, ok := expr.(ast.GetExpr); ok { + oType := obj.Object.Resolve() + sEnv, ok := p.structs[oType.Value.Value] + if !ok { + // shouldn't happen + return nil, errors.NewError(errors.Unexpected, oType.Value.Loc, "undeclared struct", oType.String()) + } + env = sEnv + expr = obj.Expr + } + + v, ok := expr.(ast.Variable) + if !ok { + return nil, errors.NewError(errors.InvalidTarget, tok.Loc, "invalid assignment target") + } + + typ, ok := env.Get(v.Name.Value, false) + if !ok { + return nil, errors.NewError(errors.UndefinedIdent, v.Name.Loc, "undeclared variable", v.String()) + } + if typ.Value.Id == tokens.TArray && v.Index != nil { + typ = *typ.Ret + } + if !typ.Compare(value.Resolve()) { + return nil, errors.NewError(errors.InvalidValue, tok.Loc, "invalid value in assignation, expected", typ.String(), "found", value.Resolve().String()) + } + if typ.Value.Id == tokens.TArray || typ.Value.Id == tokens.TStruct { + if !typ.Ref { + return nil, errors.NewError(errors.TypeMismatch, v.Name.Loc, "variable cannot be converted to a reference", v.String()) + } + if !typ.ResolveConst() && value.Resolve().ResolveConst() { + return nil, errors.NewError(errors.TypeMismatch, v.Name.Loc, "variable cannot be used as referece to constant") + } + } + if typ.Const { + return nil, errors.NewError(errors.AssignConst, v.Name.Loc, "value cannot be assigned to constant") + } + return ast.Assign{Left: left, Right: value, Loc: tok.Loc}, nil + } + + return expr, nil +} + +func (p *Parser) or() (ast.Expr, error) { + expr, err := p.and() + if err != nil { + return nil, err + } + + if p.check(tokens.Or) { + op := p.next() + right, err := p.and() + if err != nil { + return nil, err + } + expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool} + } + + return expr, nil +} + +func (p *Parser) and() (ast.Expr, error) { + expr, err := p.equality() + if err != nil { + return nil, err + } + + if p.check(tokens.And) { + op := p.next() + right, err := p.equality() + if err != nil { + return nil, err + } + expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool} + } + + return expr, nil +} + +func (p *Parser) equality() (ast.Expr, error) { + expr, err := p.comparison() + if err != nil { + return nil, err + } + + for p.match([]tokens.TokenId{tokens.Eq, tokens.Ne}) { + op := p.prev() + right, err := p.comparison() + if err != nil { + return nil, err + } + + if !expr.Resolve().Compare(right.Resolve()) { + return nil, errors.NewError(errors.TypeMismatch, op.Loc, "type mistmach expected", expr.Resolve().String(), "found", right.Resolve().String()) + } + + expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool} + } + + return expr, nil +} + +func (p *Parser) comparison() (ast.Expr, error) { + expr, err := p.bitterm() + if err != nil { + return nil, err + } + + for p.match([]tokens.TokenId{tokens.Gt, tokens.Ge, tokens.Lt, tokens.Le}) { + op := p.prev() + right, err := p.bitterm() + if err != nil { + return nil, err + } + + if !expr.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String()) + } + if !right.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String()) + } + + expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool} + } + + return expr, nil +} + +func (p *Parser) bitterm() (ast.Expr, error) { + expr, err := p.term() + if err != nil { + return nil, err + } + + for p.match([]tokens.TokenId{tokens.BitShl, tokens.BitShr, tokens.BitOr, tokens.BitXor}) { + op := p.prev() + right, err := p.term() + if err != nil { + return nil, err + } + + if !expr.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String()) + } + if !right.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String()) + } + + expr = ast.Binary{Left: expr, Op: op, Right: right} + } + + return expr, nil +} + +func (p *Parser) term() (ast.Expr, error) { + expr, err := p.bitfactor() + if err != nil { + return nil, err + } + + for p.match([]tokens.TokenId{tokens.Sub, tokens.Add}) { + op := p.prev() + right, err := p.bitfactor() + if err != nil { + return nil, err + } + + if !expr.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String()) + } + if !right.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String()) + } + + expr = ast.Binary{Left: expr, Op: op, Right: right} + } + + return expr, nil +} + +func (p *Parser) bitfactor() (ast.Expr, error) { + expr, err := p.factor() + if err != nil { + return nil, err + } + + if p.check(tokens.BitAnd) { + op := p.next() + right, err := p.factor() + if err != nil { + return nil, err + } + + if !expr.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String()) + } + if !right.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String()) + } + + expr = ast.Binary{Left: expr, Op: op, Right: right} + } + + return expr, nil +} + +func (p *Parser) factor() (ast.Expr, error) { + expr, err := p.unary() + if err != nil { + return nil, err + } + + for p.match([]tokens.TokenId{tokens.Div, tokens.Mul, tokens.Mod}) { + op := p.prev() + right, err := p.unary() + if err != nil { + return nil, err + } + + if !expr.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String()) + } + if !right.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String()) + } + + expr = ast.Binary{Left: expr, Op: op, Right: right} + } + + return expr, nil +} + +func (p *Parser) unary() (ast.Expr, error) { + if p.match([]tokens.TokenId{tokens.TestE, tokens.Not, tokens.Neg, tokens.Sub}) { + op := p.prev() + right, err := p.unary() + if err != nil { + return nil, err + } + if (op.Id == tokens.Sub || op.Id == tokens.Neg) && right.Resolve().Value.Id != tokens.TNumber { + return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String()) + } + return ast.Unary{Op: op, Right: right}, nil + } + + return p.call() +} + +func (p *Parser) call() (ast.Expr, error) { + expr, err := p.primary() + if err != nil { + return nil, err + } + + for { + if p.check(tokens.LBracket) { + loc := p.next().Loc + + if expr.Resolve().Value.Id != tokens.TArray { + return nil, errors.NewError(errors.NotIndexable, loc, "value is not indexable") + } + + index, err := p.expr() + if err != nil { + return nil, err + } + if !index.Resolve().Compare(&ast.TypeNumber) { + return nil, errors.NewError(errors.InvalidValue, p.prev().Loc, "invalid index value") + } + + if p.next().Id != tokens.RBracket { + return nil, errors.NewError(errors.ExpectedRBracket, p.prev().Loc, "']' expected to close index") + } + + switch v := expr.(type) { + case ast.Variable: + v.Index = index + expr = v + case ast.GetExpr: + val := v.Expr.(ast.Variable) + val.Index = index + v.Expr = val + expr = v + default: + // shouldn't happen + return nil, errors.NewError(errors.Unexpected, loc, "value is not indexable") + } + continue + } + + if p.check(tokens.Dot) { + loc := p.next().Loc + prop := p.next() + if prop.Id != tokens.Ident { + return nil, errors.NewError(errors.ExpectedIdent, prop.Loc, "expected identifier") + } + sType := expr.Resolve() + if sType.Value.Id != tokens.TStruct { + return nil, errors.NewError(errors.NotStruct, loc, "property get on a non struct value") + } + + sEnv, ok := p.structs[sType.Value.Value] + if !ok { + // shouldn't happen + return nil, errors.NewError(errors.Unexpected, sType.Value.Loc, "undeclared struct", sType.String()) + } + pType, ok := sEnv.Get(prop.Value, true) + if !ok { + return nil, errors.NewError(errors.UndefinedIdent, loc, "property", prop.String(), "not defined in", sType.String()) + } + + expr = ast.GetExpr{Object: expr, Expr: ast.Variable{Name: prop, Type: pType}} + continue + } + + if p.check(tokens.LParen) { + p.next() + args := make([]ast.Expr, 0, 16) + if !p.check(tokens.RParen) { + for { + arg, err := p.expr() + if err != nil { + return nil, err + } + args = append(args, arg) + if !p.check(tokens.Comma) { + break + } + p.next() + } + if !p.check(tokens.RParen) { + return nil, errors.NewError(errors.ExpectedRParen, p.prev().Loc, "')' expected to end function call") + } + } + typ := expr.Resolve() + if typ.Value.Id != tokens.TFunc { + return nil, errors.NewError(errors.NotCallable, p.next().Loc, "value is not callable") + } + + // mostly for println that doesn't have type checks + // and will consume any number of arguments + if typ.Params != nil { + if len(typ.Params) != len(args) { + return nil, errors.NewError(errors.CallError, p.next().Loc, fmt.Sprintf("invalid number of arguments in function call: %d expected %d found", len(typ.Params), len(args))) + } + for n, arg := range args { + if !typ.Params[n].Compare(arg.Resolve()) { + return nil, errors.NewError(errors.TypeMismatch, p.next().Loc, fmt.Sprintf("type mismatch in function call argument %d: expected %s found %s", n+1, typ.Params[n].String(), arg.Resolve().String())) + } + } + } + expr = ast.Call{Callee: expr, Loc: p.next().Loc, Args: args} + continue + } + break + } + return expr, nil +} + +func (p *Parser) primary() (ast.Expr, error) { + if p.match([]tokens.TokenId{tokens.True, tokens.False}) { + return ast.Literal{Value: p.prev(), Type: ast.TypeBool}, nil + } else if p.check(tokens.String) { + return ast.Literal{Value: p.next(), Type: ast.TypeString}, nil + } else if p.check(tokens.Ident) { + value := p.next() + + typc, ok := p.env.Get(value.Value, false) + if !ok { + return nil, errors.NewError(errors.UndefinedIdent, value.Loc, "undeclared variable", value.String()) + } + if typc.Value.Id == tokens.TStruct && !typc.Inst { + return nil, errors.NewError(errors.InvalidValue, value.Loc, value.String(), "is not an instance") + } + + return ast.Variable{Name: value, Type: typc}, nil + } else if p.check(tokens.Char) { + value := p.next() + value.Id = tokens.Number + nval := byte(value.Value[0]) + return ast.Literal{Value: value, Numval: ast.Number(nval), Type: ast.TypeNumber}, nil + } else if p.check(tokens.Number) { + value := p.next() + nval, err := strconv.ParseInt(value.Value, 0, 64) + if err != nil { + return nil, errors.Error{ + Code: errors.FailedNumeric, + Message: fmt.Sprintf("[%s] failed to parse numeric value", value.Loc), + Err: err, + } + } + return ast.Literal{Value: value, Numval: ast.Number(nval), Type: ast.TypeNumber}, nil + } else if p.check(tokens.LParen) { + p.next() + expr, err := p.expr() + if err != nil { + return nil, err + } + next := p.next() + if next.Id != tokens.RParen { + found := fmt.Sprintf("'%v'", next.Value) + if next.Id == tokens.Eof { + found = "end of file" + } + return nil, errors.NewError(errors.ExpectedRParen, next.Loc, "')' expected after expression,", found, "found") + } + return ast.Group{Expr: expr}, nil + } + return nil, errors.NewError(errors.ExpectedExpr, p.curr().Loc, "expression expected") +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..8e97146 --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,250 @@ +package parser + +import ( + "strings" + "testing" + + "usebox.net/lang/ast" + "usebox.net/lang/errors" + "usebox.net/lang/tokenizer" +) + +func parse(input string) ([]ast.Expr, error) { + tr := tokenizer.NewTokenizer("-", strings.NewReader(input)) + toks, err := tr.Scan() + if err != nil { + return nil, err + } + p := NewParser(nil) + return p.Parse(toks) +} + +func expectError(t *testing.T, input string, code errors.ErrCode) { + _, errs := parse(input) + if errs == nil { + t.Errorf("expected error and it didn't happen (input: %s)", input) + } + el, ok := errs.(errors.ErrorList) + if !ok { + t.Errorf("error list expected (input: %s)", input) + } else if len(el.Errors) == 0 { + t.Errorf("error list is empty (input: %s)", input) + } else { + if first, ok := el.Errors[0].(errors.Error); !ok { + t.Errorf("error in error list not an Error (input: %s)", input) + } else { + if first.Code != code { + t.Errorf("error %d expected, got %d -> %s (input: %s)", code, first.Code, first, input) + } + } + } +} + +func TestErrorFailedNumeric(t *testing.T) { + expectError(t, "var n number = 18446744073709551616;", errors.FailedNumeric) +} + +func TestErrorExpectedRParen(t *testing.T) { + expectError(t, "var n number = (1 + 2 ;", errors.ExpectedRParen) +} + +func TestErrorExpectedExpr(t *testing.T) { + expectError(t, ";", errors.ExpectedExpr) +} + +func TestErrorExpectedVarName(t *testing.T) { + expectError(t, "var number = 1;", errors.ExpectedIdent) + expectError(t, "const false;", errors.ExpectedIdent) +} + +func TestErrorExpectedType(t *testing.T) { + expectError(t, "var n = 1;", errors.ExpectedType) + expectError(t, "const n = 1;", errors.ExpectedType) + expectError(t, "def fn(n) { }", errors.ExpectedType) + expectError(t, "def fn() { }\nvar a number = fn();", errors.TypeMismatch) +} + +func TestErrorDeclareUndefined(t *testing.T) { + expectError(t, "var a number = a;", errors.UndefinedIdent) +} + +func TestErrorExpectedSemiColon(t *testing.T) { + expectError(t, "var n number = 1", errors.ExpectedSemicolon) + expectError(t, "1 + 1", errors.ExpectedSemicolon) +} + +func TestErrorExpectedFunc(t *testing.T) { + expectError(t, "def ;", errors.ExpectedIdent) + expectError(t, "def fn;", errors.ExpectedLParen) + expectError(t, "def fn(number);", errors.ExpectedIdent) + expectError(t, "def fn(a number, number);", errors.ExpectedIdent) + expectError(t, "def fn(a, b);", errors.ExpectedType) + expectError(t, "def fn(a bool, b);", errors.ExpectedType) + expectError(t, "def fn() id;", errors.ExpectedType) + expectError(t, "def fn(a number) id;", errors.ExpectedType) + expectError(t, "def fn()", errors.ExpectedType) + expectError(t, "def fn() {", errors.ExpectedRBrace) + expectError(t, "def fn() number {", errors.ExpectedRBrace) + expectError(t, "fn(;", errors.UndefinedIdent) + expectError(t, "fn(0;", errors.UndefinedIdent) + expectError(t, "fn()", errors.UndefinedIdent) + expectError(t, "def fn() bool { return 1; }", errors.TypeMismatch) + expectError(t, "def fn() { }\ndef fn() { }", errors.AlreadyDeclared) +} + +func TestErrorReturn(t *testing.T) { + expectError(t, "return", errors.InvalidOperation) + expectError(t, "return true", errors.InvalidOperation) + expectError(t, "def fn() { return }", errors.ExpectedExpr) + expectError(t, "def fn() { return true }", errors.ExpectedSemicolon) + expectError(t, ` + def fn() [5]number { + var local [5]number; + return local; + } + `, errors.InvalidOperation) + expectError(t, ` + def A { var p number; } + def fn() A { + var local A; + return local; + } + `, errors.InvalidOperation) + expectError(t, ` + def A { var p number; } + var g A; + def fn() A { + var local A; + local = g; + return local; + } + "OK"; + `, errors.TypeMismatch) + expectError(t, ` + var g [3]bool; + def fn() [3]bool { + var local [3]bool; + local = g; + return local; + } + "OK"; + `, errors.TypeMismatch) + expectError(t, ` + const a [3]number = [1, 2, 3]; + var b [3]number = a; + `, errors.TypeMismatch) + expectError(t, ` + const a [3]number = [1, 2, 3]; + var b [3]number; + var c [3]number = b; + c = a; + `, errors.TypeMismatch) +} + +func TestErrorExpectedAssign(t *testing.T) { + expectError(t, "a = ", errors.UndefinedIdent) + expectError(t, "1 = 1;", errors.InvalidTarget) + expectError(t, "const a number = 0; a = 1;", errors.AssignConst) + expectError(t, "const a [2]number = [0, 1]; a[0] = 1;", errors.AssignConst) + expectError(t, ` + def fn() { } + const a func () = fn; + a = fn; + `, errors.AssignConst) +} + +func TestErrorLoops(t *testing.T) { + expectError(t, "continue;", errors.InvalidOperation) + expectError(t, "break;", errors.InvalidOperation) +} + +func TestErrorArray(t *testing.T) { + expectError(t, "var a [false]number;", errors.InvalidValue) + expectError(t, "var a [1]number = 1;", errors.TypeMismatch) + expectError(t, "var a []number = [1, 2, 3];", errors.ExpectedExpr) + expectError(t, "var a [2]number; var b [5]number = a;", errors.TypeMismatch) +} + +func TestErrorExprList(t *testing.T) { + expectError(t, "var a number = [1];", errors.InvalidValue) + expectError(t, "var a [1]number = [];", errors.InvalidValue) + expectError(t, "var a [2]number = [1, false];", errors.TypeMismatch) + expectError(t, "var a [2]number = [1, 2, 3];", errors.InvalidValue) + expectError(t, "var a [1]number = [false];", errors.TypeMismatch) + expectError(t, "const a number = [1];", errors.InvalidValue) + expectError(t, "const a [1]number = [];", errors.InvalidValue) + expectError(t, "const a [2]number = [1, false];", errors.TypeMismatch) + expectError(t, "const a [2]number = [1, 2, 3];", errors.InvalidValue) + expectError(t, "const a [1]number = [false];", errors.TypeMismatch) +} + +func TestErrorIndex(t *testing.T) { + expectError(t, "var a [10]number; a[false];", errors.InvalidValue) + expectError(t, "var a [10]number; a[\"bad\"];", errors.InvalidValue) + expectError(t, "var i bool; var a [10]number; a[i];", errors.InvalidValue) + expectError(t, "def fn() { } var a [10]number; a[fn];", errors.InvalidValue) + expectError(t, "def fn() bool { return false; } var a [10]number; a[fn()];", errors.InvalidValue) + expectError(t, "false[0];", errors.NotIndexable) + expectError(t, "10[0];", errors.NotIndexable) + expectError(t, "false[0] = 10;", errors.NotIndexable) +} + +func TestErrorFor(t *testing.T) { + expectError(t, "for a { }", errors.UndefinedIdent) + expectError(t, "for a != 0 { }", errors.UndefinedIdent) + expectError(t, "for a in { }", errors.ExpectedExpr) + expectError(t, "for a in a { }", errors.UndefinedIdent) + expectError(t, "var a number; for i in a { }", errors.TypeMismatch) +} + +func TestErrorStruct(t *testing.T) { + expectError(t, "def A { }", errors.InvalidValue) + expectError(t, "var a number; a.b;", errors.NotStruct) + expectError(t, "def A { var a number; }\nvar a A; a.1;", errors.ExpectedIdent) + expectError(t, "def A { var a number; }\ndef A { }", errors.AlreadyDeclared) + expectError(t, "def A { var a A; }", errors.RecursiveStruct) + expectError(t, "def A { var a number; }\nvar a A;\na.x;", errors.UndefinedIdent) + expectError(t, "def A { var a number; }\nA.a;", errors.InvalidValue) + expectError(t, "def A { var a number; }\nvar a number; a = A;", errors.InvalidValue) + expectError(t, "def A { var a number; }\nvar a A; var b number = a;", errors.TypeMismatch) + expectError(t, "def A { var a number; }\nvar a A; var b a;", errors.ExpectedType) + expectError(t, ` + def A { var a number; } + def B { var a number; } + var a A; + var b B = a; + `, errors.TypeMismatch) + expectError(t, ` + def fn() { + def A { + return; + } + } + `, errors.ExpectedDefinition) + expectError(t, ` + for { + def A { + break; + } + } + `, errors.ExpectedDefinition) + expectError(t, ` + for { + def A { + continue; + } + } + `, errors.ExpectedDefinition) + expectError(t, ` + def A { + var p number; + p = 10; + } + `, errors.ExpectedDefinition) + expectError(t, ` + def A { + var p number; + println("error"); + } + `, errors.ExpectedDefinition) +} diff --git a/tokenizer/tokenizer.go b/tokenizer/tokenizer.go new file mode 100644 index 0000000..78616c6 --- /dev/null +++ b/tokenizer/tokenizer.go @@ -0,0 +1,356 @@ +package tokenizer + +import ( + "bufio" + goerr "errors" + "fmt" + "io" + "strconv" + "strings" + "unicode" + + "usebox.net/lang/errors" + "usebox.net/lang/tokens" +) + +type rTokenIdMap map[rune]tokens.TokenId +type sTokenIdMap map[string]tokens.TokenId + +var ( + keywords = sTokenIdMap{ + "true": tokens.True, + "false": tokens.False, + "var": tokens.Var, + "const": tokens.Const, + "def": tokens.Def, + "return": tokens.Return, + "number": tokens.TNumber, + "bool": tokens.TBool, + "string": tokens.TString, + "func": tokens.TFunc, + "if": tokens.If, + "else": tokens.Else, + "for": tokens.For, + "in": tokens.In, + "continue": tokens.Continue, + "break": tokens.Break, + } + + doubleChar = map[rune]struct { + second rTokenIdMap + }{ + '|': {rTokenIdMap{'|': tokens.Or}}, + '&': {rTokenIdMap{'&': tokens.And}}, + '=': {rTokenIdMap{'=': tokens.Eq}}, + '!': {rTokenIdMap{'=': tokens.Ne, '?': tokens.TagE}}, + '>': {rTokenIdMap{'=': tokens.Ge, '>': tokens.BitShr}}, + '<': {rTokenIdMap{'=': tokens.Le, '<': tokens.BitShl}}, + } + + singleChar = rTokenIdMap{ + '(': tokens.LParen, + ')': tokens.RParen, + '+': tokens.Add, + '-': tokens.Sub, + '*': tokens.Mul, + '%': tokens.Mod, + '/': tokens.Div, + '>': tokens.Gt, + '<': tokens.Lt, + '!': tokens.Not, + '~': tokens.Neg, + '|': tokens.BitOr, + '&': tokens.BitAnd, + '^': tokens.BitXor, + '=': tokens.Assign, + '.': tokens.Dot, + ';': tokens.Semicolon, + ',': tokens.Comma, + '{': tokens.LBrace, + '}': tokens.RBrace, + '[': tokens.LBracket, + ']': tokens.RBracket, + '?': tokens.TestE, + } +) + +type Tokenizer struct { + input bufio.Reader + + loc tokens.Location +} + +func NewTokenizer(filename string, input io.Reader) *Tokenizer { + return &Tokenizer{input: *bufio.NewReader(input), loc: tokens.Location{File: filename, Line: 1, Column: 1}} +} + +func (t *Tokenizer) peek() (rune, error) { + r, _, err := t.input.ReadRune() + if err != nil { + t.input.UnreadRune() + return rune(0), err + } + if err := t.input.UnreadRune(); err != nil { + return rune(0), err + } + return r, nil +} + +func (t *Tokenizer) read() (rune, error) { + r, _, err := t.input.ReadRune() + if err != nil { + return rune(0), err + } + return r, nil +} + +func (t *Tokenizer) unread() error { + return t.input.UnreadRune() +} + +func (t *Tokenizer) skipWhitespace() (rune, error) { + var r rune + var err error +loop: + for { + r, err = t.read() + if err != nil { + break + } + switch { + case !unicode.IsSpace(r): + break loop + case r == '\n': + t.loc.Eol() + default: + t.loc.Inc() + } + } + return r, err +} + +func (t *Tokenizer) extract(filter func(rune) bool) (string, error) { + var buf strings.Builder + for { + r, err := t.peek() + // EOF, error or end of extraction + if err == io.EOF || !filter(r) { + t.loc.Add(buf.Len()) + return buf.String(), nil + } else if err != nil { + return "", err + } else { + t.read() + } + if _, err = buf.WriteRune(r); err != nil { + return "", err + } + } +} + +func (t *Tokenizer) extractEscaped(end rune) (string, error) { + var buf strings.Builder + for { + r, err := t.read() + t.loc.Inc() + if r == end { + return buf.String(), nil + } else if err == io.EOF { + return "", fmt.Errorf("EOF found before closing '%c'", end) + } else if err != nil { + return "", err + } else { + if r == '\\' { + r, err = t.read() + if err == io.EOF { + return "", goerr.New("EOF found before completing escape sequence") + } else if err != nil { + return "", err + } + switch r { + case 'n': + r = '\n' + case 't': + r = '\t' + case '\\', end: + case 'x': + count := 0 + value, err := t.extract(func(r rune) bool { + defer func() { + count++ + }() + return unicode.In(r, unicode.ASCII_Hex_Digit) && count != 2 + }) + if err != nil { + return "", err + } + if len(value) != 2 { + return "", goerr.New("invalid escape sequence") + } + nval, err := strconv.ParseInt(value, 16, 8) + if err != nil { + return "", goerr.New("invalid escape sequence") + } + r = rune(byte(nval)) + default: + return "", goerr.New("invalid escape sequence") + } + t.loc.Inc() + } + } + if _, err = buf.WriteRune(r); err != nil { + return "", err + } + } +} + +func (t *Tokenizer) token() (tokens.Token, error) { + var tok tokens.Token + var r rune + var err error + var loc tokens.Location + + for { + r, err = t.skipWhitespace() + loc = t.loc + if err == io.EOF { + return tokens.Token{Id: tokens.Eof, Loc: loc}, nil + } else if err != nil { + return tok, err + } + + // comments + if r == '/' { + if p, err := t.peek(); err == nil && p == '/' { + for { + r, err = t.read() + if err != nil { + // could be EOF + break + } + if r == '\n' { + t.loc.Eol() + break + } + } + // check for whitespace + continue + } + } + // not a comment, skipped whitespace, + // now process this token + break + } + + // identifier or keywords + if unicode.IsLetter(r) || r == '_' { + t.unread() + value, err := t.extract(func(r rune) bool { + return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' + }) + if err != nil { + return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error()) + } + // match keywords + if tokenId, ok := keywords[value]; ok { + return tokens.Token{Id: tokenId, Loc: loc, Value: value}, nil + } + // otherwise is an identitier + return tokens.Token{Id: tokens.Ident, Loc: loc, Value: value}, nil + } + + // character literal + if r == '\'' { + t.loc.Inc() + value, err := t.extractEscaped('\'') + if err != nil { + return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error()) + } + if len(value) != 1 { + return tok, errors.NewError(errors.SyntaxError, t.loc, "invalid character literal") + } + return tokens.Token{Id: tokens.Char, Loc: loc, Value: value}, nil + } + + // numeric literal + if unicode.IsDigit(r) { + t.unread() + pos := 0 + hex := false + bin := false + value, err := t.extract(func(r rune) bool { + if pos == 1 && r == 'x' { + hex = true + pos++ + return true + } + if pos == 1 && r == 'b' { + bin = true + pos++ + return true + } + if hex && unicode.In(r, unicode.ASCII_Hex_Digit) { + pos++ + return true + } + if bin && r != '0' && r != '1' { + return false + } + pos++ + return unicode.IsDigit(r) + }) + if (hex || bin) && len(value) == 2 { + err = fmt.Errorf("invalid numeric format '%s'", value) + } + if err != nil { + return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error()) + } + return tokens.Token{Id: tokens.Number, Loc: loc, Value: value}, nil + } + + // string literal + if r == '"' { + t.loc.Inc() + value, err := t.extractEscaped('"') + if err != nil { + return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error()) + } + return tokens.Token{Id: tokens.String, Loc: loc, Value: value}, nil + } + + // two character symbols + if d, ok := doubleChar[r]; ok { + if p, err := t.peek(); err == nil { + if second, ok := d.second[p]; ok { + t.read() + t.loc.Add(2) + // FIXME: value + return tokens.Token{Id: second, Loc: loc, Value: string(r) + string(p)}, nil + } + } + } + + // single character symbols + if tokenId, ok := singleChar[r]; ok { + t.loc.Inc() + return tokens.Token{Id: tokenId, Loc: loc, Value: string(r)}, nil + } + + // invalid token + return tok, errors.NewError(errors.SyntaxError, t.loc, "invalid character:", fmt.Sprintf("'%c'", r)) +} + +func (t *Tokenizer) Scan() ([]tokens.Token, error) { + var ts []tokens.Token + + for { + token, err := t.token() + if err != nil { + return nil, err + } + ts = append(ts, token) + if token.Id == tokens.Eof { + return ts, nil + } + } +} diff --git a/tokenizer/tokenizer_test.go b/tokenizer/tokenizer_test.go new file mode 100644 index 0000000..47afebb --- /dev/null +++ b/tokenizer/tokenizer_test.go @@ -0,0 +1,310 @@ +package tokenizer + +import ( + "strings" + "testing" + + "usebox.net/lang/tokens" +) + +func TestSkipWhitespace(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(" \t\r\n")) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Eof { + t.Errorf("Eof expected, got %s", tok) + } + if tok.Loc.Line != 2 { + t.Errorf("line == 2 expected, got %s", tok.Loc) + } + if tok.Loc.Column != 1 { + t.Errorf("column == 1 expected, got %s", tok.Loc) + } +} + +func TestComments(t *testing.T) { + for _, tt := range []struct { + name string + input string + line int + col int + }{ + {"single comment", "// a comment\n", 2, 1}, + {"ignore tokens", "// 1 + 2 is ignored\n", 2, 1}, + {"skip whitespace", " // a comment\n", 2, 1}, + {"comment to eof", "\n// comment to eof", 2, 1}, + {"multiple comments", "// comment\n// another comment", 2, 1}, + {"whitespace before comment", "\t// comment with whitespace\n\t// comment\n", 3, 1}, + {"unicode", "// こんにちは\n", 2, 1}, + } { + t.Run(tt.name, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt.input)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Eof { + t.Errorf("Eof expected, got %s", tok) + } + if tok.Loc.Line != tt.line { + t.Errorf("line == %d expected, got %s", tt.line, tok.Loc) + } + if tok.Loc.Column != tt.col { + t.Errorf("column == %d expected, got %s", tt.col, tok.Loc) + } + }) + } +} + +func TestIdent(t *testing.T) { + for _, tt := range []string{ + "ident", + "MyIdent", + "ident2", + "名前", + "__add", + } { + t.Run(tt, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Ident { + t.Errorf("Ident expected, got %s", tok) + } + if tok.Value != tt { + t.Errorf("value == %s expected, got %s", tt, tok.Value) + } + }) + } +} + +func TestMultipleCalls(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("// comment\nident // with a comment\nanother\n")) + for _, tt := range []string{ + "ident", "another", + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Ident { + t.Errorf("Ident expected, got %s", tok) + } + if tok.Value != tt { + t.Errorf("value == %s expected, got %s", tt, tok.Value) + } + } +} + +func TestKeywords(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("true false var const def return number bool string func if else for in continue break")) + for _, tt := range []tokens.TokenId{ + tokens.True, + tokens.False, + tokens.Var, + tokens.Const, + tokens.Def, + tokens.Return, + tokens.TNumber, + tokens.TBool, + tokens.TString, + tokens.TFunc, + tokens.If, + tokens.Else, + tokens.For, + tokens.In, + tokens.Continue, + tokens.Break, + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tt { + t.Errorf("%s expected, got %s", tt, tok) + } + } +} + +func TestNumber(t *testing.T) { + for _, tt := range []string{ + "1234", + "0x4d2", + "0b10011010010", + } { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Number { + t.Errorf("Number expected, got %s", tok) + } + if tok.Value != tt { + t.Errorf("value == %s expected, got '%s'", tt, tok.Value) + } + } +} + +func TestCharacter(t *testing.T) { + for tt, e := range map[string]string{ + "'a'": "a", + "'0'": "0", + "'\\''": "'", + "' '": " ", + "'\\n'": "\n", + "'\\x0d'": "\r", + } { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.Char { + t.Errorf("Number expected, got %s", tok) + } + if tok.Value != e { + t.Errorf("value == %s expected, got '%s'", e, tok.Value) + } + } +} + +func TestErrorCharacter(t *testing.T) { + for _, tt := range []string{ + "'12'", + "''", + "'\\'", + "'A", + "'世'", + "'\\x0'", + } { + tzr := NewTokenizer("-", strings.NewReader(tt)) + _, err := tzr.token() + if err == nil { + t.Errorf("expected error, didn't happen (input: %s)", tt) + } + } +} + +func TestString(t *testing.T) { + for tt, v := range map[string]string{ + "\"this is a string\"": "this is a string", + "\"0.1234\"": "0.1234", + "\"\\\"escaped\\\" string\"": "\"escaped\" string", + "\"\\n\\x0d\\t\"": "\n\r\t", + "\"Multiline\\nstring\"": "Multiline\nstring", + } { + t.Run(tt, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt)) + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tokens.String { + t.Errorf("String expected, got \"%s\"", tok) + } + if tok.Value != v { + t.Errorf("value == %s expected, got \"%s\"", tt, tok.Value) + } + }) + } +} + +func TestErrorStrnig(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("\"string and EOF")) + _, err := tzr.token() + if err == nil { + t.Errorf("expected error, didn't happen") + } +} + +func TestSingleChar(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("{ } ( ) [ ] ; , + - * % / . = > < ! ~ | & ^ ?")) + for _, tt := range []tokens.TokenId{ + tokens.LBrace, + tokens.RBrace, + tokens.LParen, + tokens.RParen, + tokens.LBracket, + tokens.RBracket, + tokens.Semicolon, + tokens.Comma, + tokens.Add, + tokens.Sub, + tokens.Mul, + tokens.Mod, + tokens.Div, + tokens.Dot, + tokens.Assign, + tokens.Gt, + tokens.Lt, + tokens.Not, + tokens.Neg, + tokens.BitOr, + tokens.BitAnd, + tokens.BitXor, + tokens.TestE, + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tt { + t.Errorf("%s expected, got %s", tt, tok) + } + } +} + +func TestDoubleChar(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader("|| && == != >= <= >> << !?")) + for _, tt := range []tokens.TokenId{ + tokens.Or, + tokens.And, + tokens.Eq, + tokens.Ne, + tokens.Ge, + tokens.Le, + tokens.BitShr, + tokens.BitShl, + tokens.TagE, + } { + tok, err := tzr.token() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if tok.Id != tt { + t.Errorf("%s expected, got %s", tt, tok) + } + } +} + +func TestScan(t *testing.T) { + for _, tt := range []struct { + name string + input string + ntokens int + }{ + {"single line", "1 + 2", 4}, + {"multiple lines", "1 + 2\nident", 5}, + {"line starts with whitespace", "1 + 2\n\tident", 5}, + } { + t.Run(tt.name, func(t *testing.T) { + tzr := NewTokenizer("-", strings.NewReader(tt.input)) + ts, err := tzr.Scan() + if err != nil { + t.Errorf("unexpected error %s", err) + } + if len(ts) != tt.ntokens { + t.Errorf("%d tokens expected, got %d", tt.ntokens, len(ts)) + } + last := ts[len(ts)-1] + if last.Id != tokens.Eof { + t.Errorf("last token expected to be Eof, got %s", last) + } + }) + } +} diff --git a/tokens/tokens.go b/tokens/tokens.go new file mode 100644 index 0000000..e063730 --- /dev/null +++ b/tokens/tokens.go @@ -0,0 +1,186 @@ +package tokens + +import ( + "encoding/json" + "fmt" +) + +type TokenId int + +func (t TokenId) String() string { + return tokenStr[t] +} + +func (t TokenId) MarshalJSON() ([]byte, error) { + return json.Marshal(t.String()) +} + +const ( + Eof TokenId = iota + Ident + + None + True + False + Var + Const + Def + TNumber + TBool + TString + TFunc + TStruct + TArray + Return + If + Else + For + In + Continue + Break + + Number + Char + String + + Or + And + Eq + Ne + Ge + Le + BitShr + BitShl + TagE + + LBrace + RBrace + LParen + RParen + LBracket + RBracket + Semicolon + Comma + Add + Sub + Mul + Mod + Div + Neg + Dot + Assign + Gt + Lt + Not + BitOr + BitAnd + BitXor + TestE +) + +var ( + tokenStr = []string{ + "Eof", + "Ident", + + // keywords + "none", + "True", + "False", + "Var", + "Const", + "Def", + "number", + "bool", + "string", + "func", + "struct", + "array", + "Return", + "If", + "Else", + "For", + "In", + "Continue", + "Break", + + // literals + "Number", + "Character", + "String", + + // double char + "Or", + "And", + "Eq", + "NE", + "Ge", + "Le", + "BitShr", + "BitShl", + "TagE", + + // single character + "LBrace", + "RBrace", + "LParen", + "RParen", + "LBracket", + "RBracket", + "Semicolon", + "Comma", + "Add", + "Sub", + "Mul", + "Mod", + "Div", + "Neg", + "Dot", + "Assign", + "Gt", + "Lt", + "Not", + "BitOr", + "BitAnd", + "BitXor", + "TestE", + } +) + +type Location struct { + File string `json:"file"` + Line int `json:"line"` + Column int `json:"col"` +} + +func (l Location) String() string { + return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column) +} + +func (l *Location) Inc() { + l.Column++ +} + +func (l *Location) Add(cols int) { + l.Column += cols +} + +func (l *Location) Eol() { + l.Line++ + l.Column = 1 +} + +type Token struct { + Id TokenId `json:"id"` + Loc Location `json:"loc"` + Value string `json:"val,omitempty"` +} + +func (t Token) String() string { + switch t.Id { + case Ident: + return fmt.Sprintf("'%s'", t.Value) + default: + return fmt.Sprintf("%s at %s", t.Id, t.Loc) + } +} -- cgit v1.2.3