aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuan J. Martinez <jjm@usebox.net>2022-07-18 07:45:58 +0100
committerJuan J. Martinez <jjm@usebox.net>2022-07-18 07:45:58 +0100
commit8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642 (patch)
treec53977d1284347bb1d5963ddb4dc7723c40c6e55
downloadmicro-lang-8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642.tar.gz
micro-lang-8bb321f8b032dfaeffbe3d1b8dfeb215c12d3642.zip
First public release
-rw-r--r--.gitignore6
-rw-r--r--COPYING21
-rw-r--r--Makefile38
-rw-r--r--README.md58
-rw-r--r--TODO.md90
-rw-r--r--ast/ast.go454
-rw-r--r--cmd/micro/micro.go202
-rw-r--r--docs/tour.md488
-rw-r--r--errors/errors.go106
-rw-r--r--go.mod3
-rw-r--r--grammar.md65
-rw-r--r--interpreter/callable.go227
-rw-r--r--interpreter/interpreter.go801
-rw-r--r--interpreter/interpreter_test.go754
-rw-r--r--micro-vim/ftdetect/micro.vim3
-rw-r--r--micro-vim/ftplugin/micro.vim26
-rw-r--r--micro-vim/syntax/micro.vim90
-rw-r--r--parser/parser.go1168
-rw-r--r--parser/parser_test.go250
-rw-r--r--tokenizer/tokenizer.go356
-rw-r--r--tokenizer/tokenizer_test.go310
-rw-r--r--tokens/tokens.go186
22 files changed, 5702 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..04863c4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.swp
+*~
+*.out
+*.test
+bin/
+input.micro
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..3271d4a
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,21 @@
+Micro, a toy programming language
+Copyright (C) 2022 by Juan J. Martinez <jjm@usebox.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..74532be
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,38 @@
+MAIN := cmd/micro/micro.go
+NAME := micro
+GO_SOURCES := $(shell find . -name '*.go')
+VERSION_STRING := $(shell git describe --exact-match --tags --always HEAD 2>/dev/null || echo git-`git rev-parse --short=6 HEAD 2>/dev/null` || echo unknown)
+GOBUILD_FLAGS := -ldflags "-X main.Version=$(VERSION_STRING)" -mod=mod
+PACKAGES := $(shell go list ./...)
+
+.PHONY: clean run parse bindir
+
+build: bin/$(NAME)
+
+build-win: bin/$(NAME).exe
+
+run: build
+ bin/$(NAME) input.micro
+
+parse: build
+ bin/$(NAME) -parse input.micro
+
+vet:
+ go vet $(PACKAGES)
+
+bin/$(NAME): bindir $(GO_SOURCES)
+ GOBIN="$(CURDIR)/bin" go build -o bin/$(NAME) $(GOBUILD_FLAGS) $(MAIN)
+
+bin/$(NAME).exe: bindir $(GO_SOURCES)
+ GOOS=windows GOARCH=386 CGO_ENABLED=1 CXX=i686-w64-mingw32-g++ CC=i686-w64-mingw32-gcc GOBIN="$(CURDIR)/bin" go build $(GOBUILD_FLAGS) -o bin/$(NAME).exe $(MAIN)
+
+GO_TEST_FLAGS ?= ""
+test:
+ go test ./... -v $(GO_TEST_FLAGS)
+
+bindir:
+ mkdir -p ./bin
+
+clean:
+ rm -f bin/$(NAME) bin/$(NAME).exe
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..14b1170
--- /dev/null
+++ b/README.md
@@ -0,0 +1,58 @@
+# Micro Programming Language
+
+Micro is a small statically typed toy programming language.
+
+Objectives:
+
+* Learn and have fun
+* Build an interpreter
+* (may be) Build a cross-compiler targeting 8-bit microcomputers (Z80 CPU)
+ * Easy to interact with existing libraries written in other languages
+* Fast compilation (compiler) / start-up time (interpreter)
+* Micro should be statically typed, small but useful, and reasonably performant (including on the target CPU)
+
+## Current status
+
+There is a tree-walk interpreter built in Go mostly following the most excellent [Crafting Interpreters](https://craftinginterpreters.com/) book. It isn't fast, but it can be useful to test code and play around.
+
+A compiler targeting the Z80 CPU was planned, and that is influencing what is available on the interpreter. Unfortunately this has proven to be more than I can currently tackle, so I'm releasing the interpreter "as is".
+
+There's a plugin to provide syntax highlighting in vim. The proposed extension for Micro source code is `.micro` or `.cro`.
+
+### Running the interpreter
+
+Run a script with `micro file.micro`, or start the REPL with `micro`:
+```
+Welcome to Micro <VERSION>
+Type in expressions for evaluation, or :q to exit.
+
+micro> println("Hello micro!");
+Hello micro!
+13
+micro> :q
+```
+
+Also run `micro -h` for the CLI help.
+
+An example of a program:
+
+```micro
+def fib(n number) number {
+ if n < 2 {
+ return n;
+ } else {
+ return fib(n - 1) + fib(n - 2);
+ }
+}
+
+println(fib(20));
+// output: 6765
+```
+Check [the tour](docs/tour.md) for features and details about the language.
+
+You can also read [the grammar](grammar.md).
+
+## Copying
+
+This software is distributed under MIT license, unless stated otherwise.
+
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..9203907
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,90 @@
+# Numeric types
+
+Instead of "number", integer:
+
+ * i8, i16: signed
+ * u8, u16: unsigned
+
+Operations:
+
+ * x.lo and x.hi are r/w properties to get/set low/high bytes in 16-bit numbers
+ * i8(x), u8(x), i16(x), u16(x) for conversions (there are no implicit conversions)
+
+Consequences:
+
+ * array index is either u8 or u16
+
+# Strings
+
+Strings are zero ended arrays of u8 that can be represented as a literal using double quotes.
+
+```micro
+// 6 characters plus a 0 at the end
+var s [7]u8 = "string";
+
+// change value; length <= array size
+s = "value";
+
+s = "another";
+// error: string is too long for [7]u8
+```
+
+Notes:
+
+ * "xxx" + 0 to initialize an array of u8
+ * can be used to change the value
+ * the string can be shorter than the array space, but length + 1 must fit in the array
+
+The interpreter built-in `println` will print literals as strings, but in order to output an array of u8 as a string, use `str` property:
+
+The following escape sequences are supported in both strings and character literals:
+
+ * In string literals `\"` for a double quote.
+ * In character literals `\'` for a single quote.
+ * `\n` for end of line.
+ * `\t` for tab.
+ * `\x00` for a 2 digits hex number (8-bit).
+ * `\\` for the backslash itself.
+
+```micro
+// the array is initialized to zeroes
+var s [10]u8;
+
+s[0] = 'H';
+s[1] = 'e';
+s[2] = 'l';
+s[3] = 'l';
+s[4] = 'o';
+
+println(s.str);
+// output: Hello
+```
+
+# Constants and expression evaluation
+
+The parser should resolve most constants and expression evaluation.
+
+e.g. `a = 1 + 2` -> `a = 3`
+
+Constant as literal replacement (constants MUST be literals after expression evaluation).
+
+Built-in `len` should be resolved at parsing time (applies to array size and that is static).
+
+# Remove closures
+
+ * why: they are dynamic
+ * can we have a limited closure?
+
+# Directives
+
+Include a file in current source and it will interpreted / compiled.
+
+```micro
+include "file.micro"
+```
+
+Include a binary file as an expression list to initialize an array.
+
+```micro
+var arr [64]number = incbin "file.micro";
+```
diff --git a/ast/ast.go b/ast/ast.go
new file mode 100644
index 0000000..a654e4b
--- /dev/null
+++ b/ast/ast.go
@@ -0,0 +1,454 @@
+package ast
+
+import (
+ "encoding/json"
+ "fmt"
+ "strings"
+
+ "usebox.net/lang/tokens"
+)
+
+// for JSON
+type node struct {
+ Type string `json:"type"`
+ Object any `json:"object"`
+}
+
+var (
+ TypeNumber = Type{Value: tokens.Token{Id: tokens.TNumber}, Const: true}
+ TypeBool = Type{Value: tokens.Token{Id: tokens.TBool}, Const: true}
+ TypeString = Type{Value: tokens.Token{Id: tokens.TString}, Const: true}
+ TypeArray = Type{Value: tokens.Token{Id: tokens.TArray}, Const: true}
+ TypeNone = Type{Value: tokens.Token{Id: tokens.None}, Const: true}
+)
+
+type Type struct {
+ Value tokens.Token `json:"value"`
+ Params []Type `json:"params,omitempty"`
+ Ret *Type `json:"type,omitempty"`
+ Len *int `json:"len,omitempty"`
+ Const bool `json:"const"`
+ Inst bool `json:"-"`
+ Ref bool `json:"-"`
+}
+
+func NewStructType(name tokens.Token) *Type {
+ return &Type{Value: tokens.Token{Id: tokens.TStruct, Value: name.Value, Loc: name.Loc}}
+}
+
+func NewFuncType(name tokens.Token, params []Type, ret *Type) *Type {
+ return &Type{Value: tokens.Token{Id: tokens.TFunc, Loc: name.Loc}, Params: params, Ret: ret}
+}
+
+func (t Type) MarshalJSON() ([]byte, error) {
+ type Object Type
+ return json.MarshalIndent(node{"type", Object(t)}, "", " ")
+}
+
+func (t Type) String() string {
+ if t.Value.Id == tokens.TFunc {
+ var b = strings.Builder{}
+ b.WriteString(t.Value.Id.String())
+ if t.Params == nil {
+ // not specified (e.g. println)
+ b.WriteString(" (...)")
+ } else if len(t.Params) > 0 {
+ b.WriteString(" (")
+ for n, param := range t.Params {
+ b.WriteString(param.String())
+ if n < len(t.Params)-1 {
+ b.WriteString(", ")
+ }
+ }
+ b.WriteString(")")
+ } else {
+ b.WriteString(" ()")
+ }
+
+ if t.Ret != nil {
+ b.WriteString(" ")
+ b.WriteString(t.Ret.String())
+ }
+
+ return b.String()
+ }
+
+ if t.Value.Id == tokens.TStruct {
+ return fmt.Sprintf("struct %s", t.Value.Value)
+ }
+
+ if t.Value.Id == tokens.TArray {
+ if t.Ret != nil {
+ return fmt.Sprintf("array [%d]%s", *t.Len, t.Ret.String())
+ }
+ return "array"
+ }
+
+ return t.Value.Id.String()
+}
+
+func (t Type) Compare(other *Type) bool {
+ if other == nil {
+ return false
+ }
+ if t.Value.Id != other.Value.Id {
+ return false
+ }
+
+ if t.Value.Id == tokens.TArray {
+ // this is len accepting any length of array
+ if t.Len == nil || other.Len == nil {
+ return true
+ }
+ return *t.Len == *other.Len
+ }
+
+ if t.Value.Id == tokens.TStruct {
+ return t.Value.Value == other.Value.Value
+ }
+
+ if t.Value.Id == tokens.TFunc {
+ // func checks
+ if len(t.Params) != len(other.Params) {
+ return false
+ }
+ for i := range t.Params {
+ if !t.Params[i].Compare(&other.Params[i]) {
+ return false
+ }
+ }
+ if t.Ret != nil {
+ return t.Ret.Compare(other.Ret)
+ }
+ return t.Ret == other.Ret
+ }
+
+ return true
+}
+
+func (t Type) ResolveConst() bool {
+ if t.Value.Id == tokens.TArray {
+ return t.Ret.Const
+ }
+ return t.Const
+}
+
+type Expr interface {
+ Resolve() *Type
+}
+
+type ExprList struct {
+ Exprs []Expr `json:"exprs"`
+}
+
+func (n ExprList) MarshalJSON() ([]byte, error) {
+ type Object ExprList
+ return json.MarshalIndent(node{"exprlist", Object(n)}, "", " ")
+}
+
+func (n ExprList) Resolve() *Type {
+ return n.Exprs[0].Resolve()
+}
+
+type Var struct {
+ Name tokens.Token `json:"ident"`
+ Type Type `json:"type"`
+ Initv Expr `json:"initval,omitempty"`
+}
+
+func (n Var) MarshalJSON() ([]byte, error) {
+ type Object Var
+ return json.MarshalIndent(node{"var", Object(n)}, "", " ")
+}
+
+func (n Var) Resolve() *Type {
+ return &n.Type
+}
+
+type Variable struct {
+ Name tokens.Token `json:"ident"`
+ Index Expr `json:"index,omitempty"`
+ Type Type `json:"-"`
+}
+
+func (n Variable) MarshalJSON() ([]byte, error) {
+ type Object Variable
+ return json.MarshalIndent(node{"variable", Object(n)}, "", " ")
+}
+
+func (n Variable) Resolve() *Type {
+ if n.Type.Value.Id == tokens.TArray && n.Index != nil {
+ return n.Type.Ret
+ }
+ return &n.Type
+}
+
+func (n Variable) String() string {
+ if n.Index != nil {
+ return fmt.Sprintf("%s[%s]", n.Name.Value, n.Index.Resolve().Value)
+ }
+ if n.Type.Value.Id == tokens.TStruct {
+ return fmt.Sprintf("<struct %s>.%s", n.Type.Value.Value, n.Name.Value)
+ }
+ return n.Name.String()
+}
+
+type Assign struct {
+ Left Expr `json:"left"`
+ Right Expr `json:"right"`
+ Loc tokens.Location `json:"-"`
+}
+
+func (n Assign) MarshalJSON() ([]byte, error) {
+ type Object Assign
+ return json.MarshalIndent(node{"variable", Object(n)}, "", " ")
+}
+
+func (n Assign) Resolve() *Type {
+ return n.Left.Resolve()
+}
+
+type Binary struct {
+ Left Expr `json:"left"`
+ Op tokens.Token `json:"op"`
+ Right Expr `json:"right"`
+ Type *Type
+}
+
+func (n Binary) MarshalJSON() ([]byte, error) {
+ type Object Binary
+ return json.MarshalIndent(node{"binary", Object(n)}, "", " ")
+}
+
+func (n Binary) Resolve() *Type {
+ if n.Type != nil {
+ return n.Type
+ } else {
+ return n.Left.Resolve()
+ }
+}
+
+type Unary struct {
+ Op tokens.Token `json:"op"`
+ Right Expr `json:"expr"`
+}
+
+func (n Unary) MarshalJSON() ([]byte, error) {
+ type Object Unary
+ return json.MarshalIndent(node{"unary", Object(n)}, "", " ")
+}
+
+func (n Unary) Resolve() *Type {
+ if n.Op.Id == tokens.TestE {
+ return &TypeBool
+ }
+ return n.Right.Resolve()
+}
+
+type Number int64
+
+func (n Number) String() string {
+ return fmt.Sprintf("%d", n)
+}
+
+func (n Number) Resolve() *Type {
+ return &TypeNumber
+}
+
+type Literal struct {
+ Value tokens.Token `json:"value"`
+ Numval Number `json:"numval,omitempty"`
+ Type Type `json:"-"`
+}
+
+func (n Literal) MarshalJSON() ([]byte, error) {
+ type Object Literal
+ return json.MarshalIndent(node{"literal", Object(n)}, "", " ")
+}
+
+func (n Literal) Resolve() *Type {
+ return &n.Type
+}
+
+type Group struct {
+ Expr Expr `json:"expr"`
+}
+
+func (n Group) MarshalJSON() ([]byte, error) {
+ type Object Group
+ return json.MarshalIndent(node{"group", Object(n)}, "", " ")
+}
+
+func (n Group) Resolve() *Type {
+ return n.Expr.Resolve()
+}
+
+type Block struct {
+ Stmts []Expr `json:"stmts"`
+}
+
+func (n Block) MarshalJSON() ([]byte, error) {
+ type Object Block
+ return json.MarshalIndent(node{"block", Object(n)}, "", " ")
+}
+
+func (n Block) Resolve() *Type {
+ // not an expression
+ return &TypeNone
+}
+
+type For struct {
+ Cond Expr `json:"cond,omitempty"`
+ Stmts Expr `json:"stmts"`
+}
+
+func (n For) MarshalJSON() ([]byte, error) {
+ type Object For
+ return json.MarshalIndent(node{"for", Object(n)}, "", " ")
+}
+
+func (n For) Resolve() *Type {
+ // not an expression
+ return &TypeNone
+}
+
+type ForIn struct {
+ Name tokens.Token `json:"ident"`
+ Expr Expr `json:"expr"`
+ Stmts Expr `json:"stmts"`
+}
+
+func (n ForIn) MarshalJSON() ([]byte, error) {
+ type Object ForIn
+ return json.MarshalIndent(node{"forin", Object(n)}, "", " ")
+}
+
+func (n ForIn) Resolve() *Type {
+ // not an expression
+ return &TypeNone
+}
+
+type IfElse struct {
+ Cond Expr `json:"cond"`
+ True Expr `json:"true"`
+ False Expr `json:"false"`
+}
+
+func (n IfElse) MarshalJSON() ([]byte, error) {
+ type Object IfElse
+ return json.MarshalIndent(node{"ifelse", Object(n)}, "", " ")
+}
+
+func (n IfElse) Resolve() *Type {
+ // not an expression
+ return &TypeNone
+}
+
+type Call struct {
+ Callee Expr `json:"callee"`
+ Loc tokens.Location `json:"-"`
+ Args []Expr `json:"args"`
+}
+
+func (n Call) MarshalJSON() ([]byte, error) {
+ type Object Call
+ return json.MarshalIndent(node{"call", Object(n)}, "", " ")
+}
+
+func (n Call) Resolve() *Type {
+ typ := n.Callee.Resolve().Ret
+ if typ == nil {
+ typ = &TypeNone
+ }
+ return typ
+}
+
+type Struct struct {
+ Name tokens.Token `json:"ident"`
+ Body Block `json:"body"`
+}
+
+func (n Struct) MarshalJSON() ([]byte, error) {
+ type Object Struct
+ return json.MarshalIndent(node{"struct", Object(n)}, "", " ")
+}
+
+func (n Struct) Resolve() *Type {
+ return NewStructType(n.Name)
+}
+
+type GetExpr struct {
+ Object Expr `json:"object"`
+ Expr Expr `json:"expr"`
+}
+
+func (n GetExpr) MarshalJSON() ([]byte, error) {
+ type Object GetExpr
+ return json.MarshalIndent(node{"get", Object(n)}, "", " ")
+}
+
+func (n GetExpr) Resolve() *Type {
+ // this is likely to be wrong
+ return n.Expr.Resolve()
+}
+
+type Func struct {
+ Name tokens.Token `json:"ident"`
+ Params []Var `json:"params"`
+ Ret *Type `json:"ret"`
+ Body Block `json:"body"`
+}
+
+func (n Func) MarshalJSON() ([]byte, error) {
+ type Object Func
+ return json.MarshalIndent(node{"func", Object(n)}, "", " ")
+}
+
+func (n Func) Resolve() *Type {
+ if n.Ret == nil {
+ return &TypeNone
+ } else {
+ return n.Ret
+ }
+}
+
+type Return struct {
+ Value Expr `json:"expr"`
+ Loc tokens.Location `json:"-"`
+ Error bool `json:"error"`
+}
+
+func (n Return) MarshalJSON() ([]byte, error) {
+ type Object Return
+ return json.MarshalIndent(node{"return", Object(n)}, "", " ")
+}
+
+func (n Return) Resolve() *Type {
+ return n.Value.Resolve()
+}
+
+type Continue struct {
+ Loc tokens.Location `json:"-"`
+}
+
+func (n Continue) MarshalJSON() ([]byte, error) {
+ type Object Continue
+ return json.MarshalIndent(node{"continue", Object(n)}, "", " ")
+}
+
+func (n Continue) Resolve() *Type {
+ return &TypeNone
+}
+
+type Break struct {
+ Loc tokens.Location `json:"-"`
+}
+
+func (n Break) MarshalJSON() ([]byte, error) {
+ type Object Break
+ return json.MarshalIndent(node{"break", Object(n)}, "", " ")
+}
+
+func (n Break) Resolve() *Type {
+ return &TypeNone
+}
diff --git a/cmd/micro/micro.go b/cmd/micro/micro.go
new file mode 100644
index 0000000..3e5c9e6
--- /dev/null
+++ b/cmd/micro/micro.go
@@ -0,0 +1,202 @@
+package main
+
+import (
+ "bufio"
+ "encoding/json"
+ "errors"
+ "flag"
+ "fmt"
+ "os"
+ "path"
+ "strings"
+ "time"
+
+ "usebox.net/lang/interpreter"
+ "usebox.net/lang/parser"
+ "usebox.net/lang/tokenizer"
+)
+
+const name = "micro"
+
+var Version = "<unset>"
+
+func perror(err error) {
+ fmt.Fprintln(os.Stderr, err)
+
+ first := err
+ for {
+ perr := err
+ err = errors.Unwrap(err)
+ if err == nil {
+ if perr.Error() != first.Error() {
+ fmt.Fprintf(os.Stderr, "%s\n", perr)
+ }
+ break
+ }
+ }
+}
+
+func fatal(errno int, err error) {
+ perror(err)
+ os.Exit(errno)
+}
+
+func fatals(typ string, errno int, err error) {
+ fmt.Fprintf(os.Stderr, "%s: %s\n", name, typ)
+ fatal(errno, err)
+}
+
+func repl() {
+ scanner := bufio.NewScanner(os.Stdin)
+ p := parser.NewParser(interpreter.BuiltInTypes())
+ i := interpreter.NewInterpreter()
+
+ fmt.Printf("Welcome to %s %s\nType in expressions for evaluation, or :q to exit.\n\n", strings.Title(name), Version)
+
+ repl := 0
+ for {
+ fmt.Printf("%s> ", name)
+ var line string
+ multiline := false
+ for {
+ if multiline {
+ fmt.Print(" | ")
+ line += " "
+ }
+ if !scanner.Scan() {
+ fmt.Println()
+ return
+ }
+ r := scanner.Text()
+ if r == ":q" {
+ return
+ }
+ line += r
+
+ if !multiline {
+ if r == "" {
+ continue
+ }
+ if r[len(r)-1] != ';' {
+ multiline = true
+ continue
+ }
+ break
+ }
+ if r == "" {
+ break
+ }
+ }
+
+ in := strings.NewReader(line)
+ repl++
+ t := tokenizer.NewTokenizer(fmt.Sprintf("in%d", repl), in)
+ ts, err := t.Scan()
+ if err != nil {
+ perror(err)
+ continue
+ }
+
+ tree, err := p.Parse(ts)
+ if err != nil {
+ perror(err)
+ continue
+ }
+
+ for _, expr := range tree {
+ v, err := i.Interp(expr)
+ if err != nil {
+ perror(err)
+ break
+ }
+ if v != nil {
+ fmt.Println(v)
+ }
+ }
+ }
+}
+
+func usage() {
+ me := path.Base(os.Args[0])
+ fmt.Fprintf(os.Stderr,
+ "Usage: %s [options] ... [file]\nOptions:\n", me)
+ flag.PrintDefaults()
+ fmt.Fprintln(os.Stderr)
+}
+
+func main() {
+ fVersion := flag.Bool("V", false, "display version and exit")
+ fParse := flag.Bool("parse", false, "parse reporting any errors and exit")
+ fDebugTokens := flag.Bool("tokens", false, "dump to stderr tokens and exit")
+ fDebugAst := flag.Bool("ast", false, "dump to stderr AST and exit")
+ fTime := flag.Bool("time", false, "measure execution time")
+ flag.Usage = usage
+ flag.Parse()
+
+ if *fVersion {
+ fmt.Printf("%s %s\n", name, Version)
+ os.Exit(0)
+ }
+
+ var input *os.File
+ var filename = flag.Arg(0)
+
+ if filename == "" {
+ repl()
+ os.Exit(0)
+ } else {
+ var err error
+ input, err = os.Open(filename)
+ if err != nil {
+ fatals("error opening input file", 1, err)
+ }
+ }
+ t := tokenizer.NewTokenizer(filename, input)
+ ts, err := t.Scan()
+ if err != nil {
+ fatals("parsing error", 10, err)
+ }
+
+ if *fDebugTokens {
+ out, err := json.MarshalIndent(ts, "", " ")
+ if err != nil {
+ fatals("encoding error", 11, err)
+ }
+ fmt.Fprintf(os.Stderr, "%s\n", string(out))
+ os.Exit(0)
+ }
+
+ p := parser.NewParser(interpreter.BuiltInTypes())
+ tree, err := p.Parse(ts)
+ if *fParse {
+ if err != nil {
+ perror(err)
+ os.Exit(1)
+ }
+ os.Exit(0)
+ }
+ if err != nil {
+ fatal(20, err)
+ }
+
+ if *fDebugAst {
+ out, err := json.MarshalIndent(tree, "", " ")
+ if err != nil {
+ fatals("encoding error", 21, err)
+ }
+ fmt.Fprintf(os.Stderr, "%s\n", string(out))
+ os.Exit(0)
+ }
+
+ start := time.Now()
+ i := interpreter.NewInterpreter()
+ for _, expr := range tree {
+ if _, err := i.Interp(expr); err != nil {
+ fatal(31, err)
+ }
+ }
+
+ if *fTime {
+ fmt.Fprintf(os.Stderr, "elapsed: %f\n", time.Since(start).Seconds())
+ }
+}
diff --git a/docs/tour.md b/docs/tour.md
new file mode 100644
index 0000000..27c7e14
--- /dev/null
+++ b/docs/tour.md
@@ -0,0 +1,488 @@
+# A tour of Micro
+
+Micro is a small statically typed toy programming language.
+
+## Syntax
+
+Micro statements are:
+
+* Variable declarations and expressions, delimited by semicolons.
+* Function and structure definitions.
+
+### Comments
+
+Only line comments, starting with `//` and ending at the end of the line.
+
+```micro
+// This is a comment
+```
+
+### Reserved words
+
+These are the reserved words in micro:
+
+> var const def func bool true false return if else for in break continue
+
+See also the built-in functions.
+
+### Identifiers
+
+Identifiers can be any string starting with a later (upper or lower case) or underscore (`_`), followed by the same type of character or a numeric value.
+
+An identifier can't be a reserved word.
+
+### Blocks and scope
+
+Micro has series of statements in blocks delimited with `{` and `}`, and a block defines a lexical scope.
+
+A variable declaration can shadow another from a parent scope.
+
+```micro
+var a number = 1;
+println(a);
+
+{
+ // block, new scope
+ var a string = "one";
+ println(a);
+}
+
+println(a);
+
+// output:
+// 1
+// one
+// 1
+```
+
+### Operators
+
+Micro uses the same operators and almost the same operator precedence as C, from more to less priority:
+
+```micro
+? ! - ~
+/ * %
+&
+- +
+<< >> | ^
+> >= < <=
+|| &&
+!= ==
+=
+```
+
+Arithmetic operators only apply to integers.
+
+Comparison operators only apply to integers, with the exception of `==` and `!=` that also apply to other types (if both operands are of the same type).
+
+Logic operators apply to booleans and to the following:
+
+* An empty string is false, true otherwise.
+* A numeric value of 0 is false, true otherwise.
+* A function reference is always true.
+
+The result of these operators is a boolean and they are lazy operators (e.g. in `a && b`, `b` won't be evaluated if `a` is false).
+
+## Values
+
+| Type | Keyword | Sample |
+| --- | --- | --- |
+| Boolean | `bool` | `true; false; ` |
+| Number | `number` (int64) | `123;` |
+| String | `string` | `"example";` |
+
+There is also a "none" type referred when a function doesn't return a value. It is not a available to use but it will appear in error reporting.
+
+For example, the interpreter running this code:
+```micro
+def fn() { }
+
+var a number = fn();
+// error: type mismatch expected number found none
+```
+
+See functions for more information about the "func" type.
+
+Number supports the following formats:
+
+ * 0 decimal
+ * 0x00 hexadecimal
+ * 0b00 binary
+ * 'x' for "ASCII character"
+
+## Variables
+
+Variables are declared with `var`.
+
+If they are not initialized, a default value is assigned; with the exception of functions, that can't be used until they have a value (or there will be a runtime error).
+
+```micro
+var a number = 123;
+var b number; // initialises default to 0
+var c string; // empty string
+var d bool; // false
+
+// e was not declared: error
+e = 123;
+
+var fn func ();
+fn();
+// error: value is not callable
+```
+
+### Constants
+
+Constants are immutable values.
+
+Constants can be literals or expressions that get evaluated at compilation time.
+
+```micro
+const K number = 10;
+
+var a number = K;
+
+K = 0;
+// error: value cannot be assigned to constant K
+```
+
+## Conditionals and flow control
+
+### If-else
+
+```micro
+var a number;
+
+if a > 0 {
+ println("it is true");
+} else {
+ println("it is false");
+}
+
+// output:
+// it is false
+```
+
+### For and for-in
+
+The block will loop while the expression evaluates to true.
+
+```micro
+var i number = 4;
+
+for i > 0 {
+ println(i);
+ i = i - 1;
+}
+
+// output:
+// 4
+// 3
+// 2
+// 1
+```
+
+Loop flow can be altered with:
+
+- `continue` to jump to next iteration.
+- `break` to exit the loop.
+
+The expression is optional (infinite loop).
+
+```micro
+var i number = 4;
+
+for {
+ println(i);
+ i = i - 1;
+ if i == 0 {
+ break;
+ }
+}
+```
+
+`for ... in` can be used to Iterate over arrays.
+
+```micro
+var arr [3]string = ["one", "two", "three"];
+for i in arr {
+ // i is of type string
+ println(i);
+}
+// output:
+// one
+// two
+// three
+```
+
+## Functions
+
+Functions can be defined using `def` and they are assigned a "func" type describing parameters and return type.
+
+Recursion is supported and recursive tail calls will be optimized.
+
+```micro
+// recursive factorial
+// (the tail call will be optimized)
+def fact(n number, acc number) number {
+ if n == 1 {
+ return acc;
+ } else {
+ return fact(n - 1, acc * n);
+ }
+}
+// type of fact is: func (number, number) number
+
+println(fact(20, 1));
+// output:
+// 2432902008176640000
+
+def fn(a number, b number) {
+ // implicit return with no value
+ // it is an error if a value is expected
+}
+```
+
+Functions are higher-order.
+
+```micro
+// b is a function that takes one number and returns a number
+def fn(a number, b func (number) number) number {
+ return b(a);
+}
+```
+
+Closures are supported.
+
+```micro
+def makeCounter() func (number) number {
+ // defaults to 0
+ var c number;
+ def count() number {
+ c = c + 1;
+ return c;
+ }
+ return count;
+}
+
+var c func = makeCounter();
+
+println(c()); // 1
+println(c()); // 2
+println(c()); // 3
+```
+
+Arrays and structures are always passed as reference.
+
+```micro
+var arr [5]number;
+
+def fn(a [5]number) {
+ a[0] = 10;
+}
+fn(arr);
+
+arr[0]; // 10
+```
+
+Local arrays and "struct" can't be returned from a function.
+
+```micro
+def fn() [5]number {
+ var local [5]number;
+ return local;
+}
+// error: returning a local value of type array [5]number
+```
+
+But references to variables in scope are allowed.
+
+```micro
+var g [5]number;
+def fn() [5]number {
+ // local is a reference to a non-local variable
+ var local [5]number = g;
+ return local;
+}
+// error: returning a local value of type array [5]number
+```
+
+### Returning error
+
+For cases when the return value can't be used to inform of an error, a function can tag a return value as "error" using `!?`. That tag can be tested with the `?` unary operator.
+
+Even if a return value is tagged, it must be a valid return value for the function definition.
+
+```micro
+def safeDiv(a number, b number) number {
+ // a / 0 would result on a runtime error!
+ if b == 0 {
+ return !? 0;
+ }
+ return a / b;
+}
+
+var result number;
+
+// checks if the error tag is set while keeping the return value
+if ? (result = safeDiv(1, 0)) {
+ println("safeDiv returned error");
+}
+
+// it can also be used with functions returning no values
+def fn() {
+ return !?;
+}
+
+if ? fn() {
+ println("fn always fails!");
+}
+```
+
+`?` (test error) operator resets the error flag before evaluating the right expression.
+
+```micro
+// our previous "always fails" function
+fn();
+// error is not set
+if ? true {
+ println("Not run");
+}
+```
+
+## Arrays
+
+Arrays are zero based and are supported for all types.
+
+Array size is a numeric literal or a constant expression (must be known at compilation type).
+
+Arrays can be initialised to literals with:
+
+* `[` and `]` providing a list of values.
+* A literal string using double quotes for arrays of unit8; the array size must be at least the length of the string plus 1 (a zero terminating the string).
+
+```micro
+// array of 5 numbers; the size is only needed
+// when no initializing values are provided
+var arr [5]number;
+
+// initialized to specific values
+var arr2 [5]bool = [true, false, true, false, true];
+
+arr[0]; // 0
+arr2[0]; // true
+
+// iterate over members
+for i in arr {
+ println(i);
+}
+
+len(arr); // 5
+
+var i number;
+// arrays indexes are zero based
+for i < len(arr) {
+ println(arr[i]);
+ i = i + 1;
+}
+
+```
+
+Arrays can be constants.
+
+```micro
+const arr [10]number;
+
+arr[0] = 10;
+// error: can't modify a constant variable
+
+// but can change references
+const ref [10]number = arr;
+const arr2 [10]number;
+
+ref = arr2;
+```
+
+Arrays are passed by reference to functions.
+
+## Structures
+
+Structures allow grouping data and functions.
+
+In a structure definition only variable declarations, and definitions of functions and other structures are allowed.
+
+```micro
+// declare struct A
+def A {
+ var n number;
+
+ def fn() number {
+ // n is in scope
+ n = n + 1;
+ return n;
+ }
+}
+
+// memory is allocated for obj
+var obj A;
+
+// obj2 is a reference to obj
+var obj2 A = obj;
+
+prinln(obj2.n);
+// output: 0
+
+println(obj.fn());
+// output: 1
+
+prinln(obj2.n);
+// output: 1
+
+def B { var n bool; }
+
+var b B = obj;
+// error: type mismatch: expected struct B found struct A
+
+// recursive structures are not supported (micro doesn't have pointers)
+def B {
+ var next B;
+ // error: recursive definition of struct B
+}
+
+def C {
+ println("error");
+ // error: expected definition in a struct block
+}
+```
+
+## Directives
+
+TODO
+
+Include a file in current source and it will interpreted / compiled.
+
+```micro
+include "file.micro"
+```
+
+Include a binary file as an expression list to initialize an array.
+
+```micro
+var arr [64]number = incbin "file.micro";
+```
+
+## Interpreter built-in functions
+
+These are available on the interpreter.
+
+| Function | Description | Example |
+| --- | --- | --- |
+| `len` | returns the size in elements of a variable of type array | `len(arr);` |
+| `panic` | ends execution with a message and a runtime error | `panic("message");` |
+| `println` | writes to standard output a variable list of expressions, followed by an end of line; returns number of bytes written | `println("value: ", a);` |
+| `str` | used with arrays of uint8 to convert them as a printable string | `println(str(arr));` |
+
+
+## To do
+
+See [TODO](todo.md) for features that are planned but not yet implemented.
+
diff --git a/errors/errors.go b/errors/errors.go
new file mode 100644
index 0000000..a8d1eb0
--- /dev/null
+++ b/errors/errors.go
@@ -0,0 +1,106 @@
+package errors
+
+import (
+ "fmt"
+ "strings"
+
+ "usebox.net/lang/tokens"
+)
+
+type ErrCode int
+
+const (
+ Unimplemented ErrCode = iota
+ SyntaxError
+ IncompleteEscape
+ InvalidChar
+ ExpectedIdent
+ ExpectedType
+ AlreadyDeclared
+ Undefined
+ ExpectedSemicolon
+ ExpectedComma
+ ExpectedExpr
+ ExpectedLParen
+ ExpectedRParen
+ ExpectedLBrace
+ ExpectedRBrace
+ ExpectedIn
+ ExpectedRBracket
+ ExpectedAssign
+ ExpectedDefinition
+ AssignConst
+ FailedNumeric
+ InvalidOperation
+ InvalidValue
+ InvalidLength
+ InvalidIndex
+ InvalidType
+ TypeMismatch
+ InvalidTarget
+ UndefinedIdent
+ NotCallable
+ NotIndexable
+ NotStruct
+ InvalidNumberArgs
+ CallError
+ NoReturn
+ TailCallError
+ RecursiveStruct
+ Unexpected
+)
+
+type Error struct {
+ Code ErrCode
+ Message string
+ Err error
+}
+
+func (e Error) Error() string {
+ return e.Message
+}
+
+func (e Error) Unwrap() error {
+ return e.Err
+}
+
+func formatError(loc tokens.Location, msg ...string) string {
+ return fmt.Sprintf("%s:%d col %d error: %s", loc.File, loc.Line, loc.Column, strings.Join(msg, " "))
+}
+
+func NewError(code ErrCode, loc tokens.Location, msg ...string) error {
+ return Error{
+ Code: code,
+ Message: formatError(loc, msg...),
+ }
+}
+
+func NewErrorWrap(code ErrCode, loc tokens.Location, err error, msg ...string) error {
+ return Error{
+ Code: code,
+ Message: formatError(loc, msg...),
+ Err: err,
+ }
+}
+
+type ErrorList struct {
+ Errors []error
+}
+
+func (el ErrorList) Error() string {
+ var b strings.Builder
+ var nerrors = len(el.Errors)
+
+ for i := 0; i < nerrors; i++ {
+ fmt.Fprint(&b, el.Errors[i])
+ if i == 10 {
+ fmt.Fprintf(&b, "\n...")
+ break
+ }
+ if i < nerrors-1 {
+ fmt.Fprintln(&b)
+ }
+ }
+
+ return b.String()
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..72a6a14
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module usebox.net/lang
+
+go 1.18
diff --git a/grammar.md b/grammar.md
new file mode 100644
index 0000000..b51adb4
--- /dev/null
+++ b/grammar.md
@@ -0,0 +1,65 @@
+ program = statement* EOF ;
+
+ statement = expr ";"
+ | definition
+ | block
+ | "if" expr block ( "else" block )?
+ | "for" ( expr? | IDENTIFIER "in" expr ) block
+ | "return" "!?"? expr? ";"
+ | "continue" ";"
+ | "break" ";"
+ | declaration ;
+
+ definition = "def" IDENTIFIER struct_block
+ | "def" IDENTIFIER "(" params? ")" TYPE? block
+
+ params = param ( "," param )* ;
+ param = IDENTIFIER TYPE ;
+
+ block = "{" statement* "}" ;
+
+ struct_block = "{" ( definitions | declaration )+ "}" ;
+
+ expr = assignment ;
+
+ declaration = "const" IDENTIFIER TYPE = expr_list ";"
+ | "var" IDENTIFIER TYPE ( "=" expr_list )? ";" ;
+
+ expr_list = expr
+ | "[" expr ( "," expr )? "]"
+
+ assignment = INDENTIFIER ( "[" expr "]" )* "=" expr
+ | logic_or ;
+
+ logic_or = logic_and ( "||" logic_and )* ;
+ logic_and = equality ( "&&" equality )* ;
+ equality = comparison ( ( "!=" | "==" ) comparison )* ;
+ comparison = bitterm ( ( ">" | ">=" | "<" | "<=" ) bitterm )* ;
+ bitterm = term ( ( "<<" | ">>" | "|" | "^" ) term )* ;
+ term = bitfactor ( ( "-" | "+" ) bitfactor )* ;
+ bitfactor = factor ( "&" factor )* ;
+ factor = unary ( ( "/" | "*" | "%" ) unary )* ;
+
+ unary = ( "?" | "!" | "-" | "~" ) unary | call ;
+ call = primary ( "(" arguments? ")" | "." IDENTIFIER )*
+ | primary ( "[" expr "]" )+ ;
+ primary = NUMBER | STRING | FUNC | "true" | "false"
+ | "(" expr ")"
+ | IDENTIFIER ;
+
+ arguments = expr ( "," expr )* ;
+
+ NUMBER = ( "0x" HEX_DIGIT+ | "0b" BIN_DIGIT+ | DIGIT+ )
+ | "'" CHAR "'";
+ STRING = "\"" ( "\\\"" | <not "\""> )* "\"" ;
+ FUNC = IDENTIFIER ;
+ IDENTIFIER = ALPHA ( ALPHA | DIGIT )* ;
+ TYPE = NON_ARRAY_TYPE | ARRAY_TYPE
+ NON_ARRAY_TYPE = "number" | "bool" | "string" | FUNC_TYPE
+ FUNC_TYPE = "func" "(" ( TYPE ( "," TYPE )* )? ")" TYPE?
+ ARRAY_TYPE = "[" expr "]" NON_ARRAY_TYPE
+ ALPHA = "a" ... "z" | "A" ... "Z" | "_" ;
+ ALPHA_DIGIT = "0" ... "9" | "a" ... "f" | "A" ... "F" ;
+ BIN_DIGIT = "0" | "1" ;
+ DIGIT = "0" ... "9" ;
+
diff --git a/interpreter/callable.go b/interpreter/callable.go
new file mode 100644
index 0000000..e9dc6aa
--- /dev/null
+++ b/interpreter/callable.go
@@ -0,0 +1,227 @@
+package interpreter
+
+import (
+ "fmt"
+
+ "usebox.net/lang/ast"
+ "usebox.net/lang/errors"
+ "usebox.net/lang/tokens"
+)
+
+type Callable interface {
+ Name() string
+ String() string
+ Call(interp *Interpreter, args []any, loc tokens.Location) (any, error)
+ Type() *ast.Type
+ Params() []ast.Type
+ Ret() *ast.Type
+}
+
+var builtInFuncs = map[string]Callable{
+ "len": builtInLen{},
+ "println": builtInPrintln{},
+ "panic": builtInPanic{},
+}
+
+func BuiltInTypes() map[string]ast.Type {
+ types := map[string]ast.Type{}
+ for k, v := range builtInFuncs {
+ types[k] = *v.Type()
+ }
+ return types
+}
+
+type builtInLen struct{}
+
+func (n builtInLen) Name() string {
+ return "'len'"
+}
+
+func (n builtInLen) Type() *ast.Type {
+ return ast.NewFuncType(tokens.Token{}, n.Params(), n.Ret())
+}
+
+func (n builtInLen) String() string {
+ return n.Type().String()
+}
+
+func (n builtInLen) Params() []ast.Type {
+ // won't be arity or type checked
+ return []ast.Type{ast.TypeArray}
+}
+
+func (n builtInLen) Ret() *ast.Type {
+ return &ast.TypeNumber
+}
+
+func (n builtInLen) Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) {
+ vals, ok := args[0].([]any)
+ if !ok {
+ // shouldn't happen
+ return nil, errors.NewError(errors.Unexpected, loc, "type mismatch in call to 'len'")
+ }
+ // return
+ panic(&PanicJump{typ: PanicJumpReturn, value: ast.Number(len(vals))})
+}
+
+type builtInPrintln struct{}
+
+func (n builtInPrintln) Name() string {
+ return "'println'"
+}
+
+func (n builtInPrintln) Type() *ast.Type {
+ return ast.NewFuncType(tokens.Token{}, n.Params(), n.Ret())
+}
+
+func (n builtInPrintln) String() string {
+ return n.Type().String()
+}
+
+func (n builtInPrintln) Params() []ast.Type {
+ // won't be arity or type checked
+ return nil
+}
+
+func (n builtInPrintln) Ret() *ast.Type {
+ return &ast.TypeNumber
+}
+
+func (n builtInPrintln) Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) {
+ var count int
+
+ for i := range args {
+ if args[i] == nil {
+ continue
+ }
+ written, err := fmt.Print(args[i])
+ if err != nil {
+ return nil, err
+ }
+ count += written
+ }
+ fmt.Println()
+ count++
+
+ // return
+ panic(&PanicJump{typ: PanicJumpReturn, value: ast.Number(count)})
+}
+
+type builtInPanic struct{}
+
+func (n builtInPanic) Name() string {
+ return "'panic'"
+}
+
+func (n builtInPanic) Type() *ast.Type {
+ return ast.NewFuncType(tokens.Token{}, n.Params(), n.Ret())
+}
+
+func (n builtInPanic) String() string {
+ return n.Type().String()
+}
+
+func (n builtInPanic) Params() []ast.Type {
+ return []ast.Type{ast.TypeString}
+}
+
+func (n builtInPanic) Ret() *ast.Type {
+ // no return (returns none)
+ return nil
+}
+
+func (n builtInPanic) Call(interp *Interpreter, args []any, loc tokens.Location) (any, error) {
+ return nil, fmt.Errorf("[%s] panic: %s", loc, args[0])
+}
+
+type Function struct {
+ fun ast.Func
+ closure *Env
+}
+
+func (f Function) Name() string {
+ return f.fun.Name.String()
+}
+
+func (f Function) Type() *ast.Type {
+ return ast.NewFuncType(f.fun.Name, f.Params(), f.Ret())
+}
+
+func (f Function) String() string {
+ return f.Type().String()
+}
+
+func (f Function) Params() []ast.Type {
+ params := make([]ast.Type, 0, 1)
+ for _, p := range f.fun.Params {
+ params = append(params, p.Type)
+ }
+ return params
+}
+
+func (f Function) Ret() *ast.Type {
+ return f.fun.Ret
+}
+
+func (f Function) Call(interp *Interpreter, args []any, loc tokens.Location) (result any, err error) {
+ pEnv := interp.env
+ interp.env = NewEnv(f.closure)
+ defer func() {
+ interp.env = pEnv
+ }()
+
+ for n, v := range f.fun.Params {
+ interp.env = interp.env.Set(v.Name.Value, Var{Value: args[n], Loc: v.Name.Loc})
+ }
+
+ // tail call optimization
+ for {
+ // wrap the evaluation in a function
+ var tailCall *PanicJump
+ tailCall, result, err = func() (tailCall *PanicJump, result any, err error) {
+ // handle tail call
+ // will call this function again without setting up a new call frame
+ defer func() {
+ if r := recover(); r != nil {
+ if val, ok := r.(*PanicJump); ok && val.typ == PanicJumpTailCall {
+ tailCall = val
+ return
+ }
+ panic(r)
+ }
+ }()
+ result, err = interp.evaluate(f.fun.Body)
+ return nil, result, err
+ }()
+ if tailCall == nil {
+ break
+ }
+
+ // XXX: can we optimize this?
+ // if the callee can be a variable expression, we probably can't
+ call := tailCall.value.(ast.Call)
+ callee, err := interp.evaluate(call.Callee)
+ if err != nil {
+ return nil, err
+ }
+ if fun, ok := callee.(Callable); !ok || fun.Name() != f.Name() {
+ // can't be optimized
+ val, err := interp.evaluate(call)
+ if err != nil {
+ return nil, err
+ }
+ panic(&PanicJump{typ: PanicJumpReturn, value: val, loc: tailCall.loc})
+ }
+
+ args, err := interp.evalArgs(f.Name(), call.Loc, f.Params(), call.Args)
+ if err != nil {
+ return nil, err
+ }
+
+ for n, v := range f.fun.Params {
+ interp.env.Update(v.Name.Value, args[n])
+ }
+ }
+
+ return result, err
+}
diff --git a/interpreter/interpreter.go b/interpreter/interpreter.go
new file mode 100644
index 0000000..8d5b7a8
--- /dev/null
+++ b/interpreter/interpreter.go
@@ -0,0 +1,801 @@
+package interpreter
+
+import (
+ "fmt"
+ "strings"
+
+ "usebox.net/lang/ast"
+ "usebox.net/lang/errors"
+ "usebox.net/lang/tokens"
+)
+
+type Var struct {
+ Value any
+ Loc tokens.Location
+}
+
+type Env struct {
+ env map[string]Var
+ parent *Env
+}
+
+func NewEnv(parent *Env) *Env {
+ return &Env{make(map[string]Var), parent}
+}
+
+func (e *Env) Get(key string, local bool) (Var, bool) {
+ p := e
+ for {
+ if v, ok := p.env[key]; ok {
+ return v, true
+ }
+
+ if !local && p.parent != nil {
+ p = p.parent
+ continue
+ }
+
+ return Var{}, false
+ }
+}
+
+func (e *Env) GetArray(key string, local bool) ([]any, bool) {
+ v, ok := e.Get(key, local)
+ if !ok {
+ return nil, false
+ }
+ arr, ok := v.Value.([]any)
+ return arr, ok
+}
+
+// Set will create a new enviroment if this new value shadows an existing one.
+func (e *Env) Set(key string, value Var) *Env {
+ p := e
+ if _, ok := e.Get(key, false); ok {
+ p = NewEnv(e)
+ }
+ p.env[key] = value
+ return p
+}
+
+func (e *Env) Update(key string, value any) error {
+ p := e
+ for {
+ if v, ok := p.env[key]; ok {
+ v.Value = value
+ p.env[key] = v
+ return nil
+ }
+
+ if p.parent != nil {
+ p = p.parent
+ continue
+ }
+
+ // should not happen
+ return fmt.Errorf("variable not found '%s'", key)
+ }
+}
+
+func (e *Env) string(pad string) string {
+ b := strings.Builder{}
+
+ b.WriteString(fmt.Sprintf("%senv {\n", pad))
+ for k, v := range e.env {
+ b.WriteString(fmt.Sprintf("%s%s = %v\n", pad+" ", k, v))
+ }
+ if e.parent != nil {
+ b.WriteString(e.parent.string(pad + " "))
+ }
+ b.WriteString(fmt.Sprintf("%s}", pad))
+
+ if pad != "" {
+ b.WriteRune('\n')
+ }
+
+ return b.String()
+}
+
+func (e Env) String() string {
+ return e.string("")
+}
+
+func (e *Env) Copy() *Env {
+ ne := NewEnv(e.parent)
+ for k, v := range e.env {
+ val := Var{Value: v.Value, Loc: v.Loc}
+ // FIXME: structs
+ switch v := val.Value.(type) {
+ // update the closures to the new environment
+ case Function:
+ v.closure = ne
+ val.Value = v
+ // copy the array
+ case []any:
+ arr := make([]any, len(v))
+ copy(arr, v)
+ val.Value = arr
+ }
+ ne.env[k] = val
+ }
+
+ return ne
+}
+
+type Struct struct {
+ name string
+ env *Env
+ inst bool
+}
+
+func (s Struct) String() string {
+ if s.inst {
+ return fmt.Sprintf("<struct %s>", s.name)
+ } else {
+ return fmt.Sprintf("struct %s", s.name)
+ }
+}
+
+func (s Struct) Instance() Struct {
+ return Struct{name: s.name, env: s.env.Copy(), inst: true}
+}
+
+type PanicJumpType int
+
+const (
+ PanicJumpReturn PanicJumpType = iota
+ PanicJumpTailCall
+ PanicJumpContinue
+ PanicJumpBreak
+)
+
+type PanicJump struct {
+ typ PanicJumpType
+ value any
+ loc tokens.Location
+}
+
+type Interpreter struct {
+ env *Env
+ // location of the last node evaluated
+ last tokens.Location
+ fError bool
+}
+
+func NewInterpreter() Interpreter {
+ global := NewEnv(nil)
+ for k, v := range builtInFuncs {
+ // not shadowing any value
+ global.Set(k, Var{Value: v})
+ }
+ return Interpreter{env: global}
+}
+
+func (i *Interpreter) Interp(expr any) (val any, err error) {
+ val, err = i.evaluate(expr)
+ if err != nil {
+ return nil, err
+ }
+ return val, nil
+}
+
+func (i *Interpreter) isTrue(expr any) bool {
+ switch v := expr.(type) {
+ case ast.Number:
+ return v != 0
+ case string:
+ return v != ""
+ case bool:
+ return v
+ case Function, Callable:
+ return true
+ default:
+ return false
+ }
+}
+
+func (i *Interpreter) unaryNumeric(op tokens.Token, right any) (ast.Number, error) {
+ if vr, ok := right.(ast.Number); ok {
+ return vr, nil
+ }
+
+ // shouldn't happen
+ return 0, errors.NewError(errors.Unexpected, op.Loc, "invalid operand")
+}
+
+func (i *Interpreter) binaryNumeric(left any, op tokens.Token, right any) (ast.Number, ast.Number, error) {
+ vl, okl := left.(ast.Number)
+ vr, okr := right.(ast.Number)
+ if okl && okr {
+ return vl, vr, nil
+ }
+
+ // shouldn't happen
+ return 0, 0, errors.NewError(errors.Unexpected, op.Loc, "invalid operands")
+}
+
+func (i *Interpreter) evaluate(expr any) (any, error) {
+ switch v := expr.(type) {
+ case ast.Var:
+ i.last = v.Name.Loc
+ return i.declare(v)
+ case ast.Variable:
+ i.last = v.Name.Loc
+ return i.variable(v)
+ case ast.Assign:
+ return i.assignment(v)
+ case ast.Binary:
+ i.last = v.Op.Loc
+ return i.binaryExpr(v)
+ case ast.Unary:
+ i.last = v.Op.Loc
+ return i.unaryExpr(v)
+ case ast.Literal:
+ i.last = v.Value.Loc
+ return i.literalExpr(v)
+ case ast.Group:
+ return i.groupExpr(v)
+ case ast.Block:
+ return i.block(v)
+ case ast.IfElse:
+ return i.ifElse(v)
+ case ast.For:
+ return i.forStmt(v)
+ case ast.ForIn:
+ return i.forInStmt(v)
+ case ast.Call:
+ i.last = v.Loc
+ return i.call(v)
+ case ast.Func:
+ i.last = v.Name.Loc
+ return i.fun(v)
+ case ast.Return:
+ return i.returnStmt(v)
+ case ast.Continue:
+ return i.continueStmt(v)
+ case ast.Break:
+ return i.breakStmt(v)
+ case ast.ExprList:
+ return i.exprList(v)
+ case ast.Struct:
+ return i.strct(v)
+ case ast.GetExpr:
+ return i.getExpr(v)
+ default:
+ // XXX
+ return nil, errors.Error{
+ Code: errors.Unimplemented,
+ Message: fmt.Sprintf("evaluation unimplemented: %s", v),
+ }
+ }
+}
+
+func (i *Interpreter) defVal(typ ast.Type) any {
+ switch typ.Value.Id {
+ case tokens.TNumber:
+ return ast.Number(0)
+ case tokens.TBool:
+ return false
+ case tokens.TString:
+ return ""
+ case tokens.TArray:
+ arr := make([]any, *typ.Len)
+ for n := range arr {
+ arr[n] = i.defVal(*typ.Ret)
+ }
+ return arr
+ case tokens.TStruct:
+ val, _ := i.env.Get(typ.Value.Value, false)
+ strct := val.Value.(Struct)
+ return strct.Instance()
+ default:
+ return nil
+ }
+}
+
+func (i *Interpreter) declare(v ast.Var) (any, error) {
+ var initv any
+ var err error
+
+ if v.Type.Value.Id == tokens.TArray && *v.Type.Len <= 0 {
+ return nil, errors.NewError(errors.InvalidLength, v.Name.Loc, "invalid array length")
+ }
+
+ if v.Initv != nil {
+ initv, err = i.evaluate(v.Initv)
+ if err != nil {
+ return 0, err
+ }
+
+ i.env = i.env.Set(v.Name.Value, Var{initv, v.Name.Loc})
+ return initv, nil
+ } else {
+ // defaults
+ initv = i.defVal(v.Type)
+ value := Var{initv, v.Name.Loc}
+ i.env = i.env.Set(v.Name.Value, value)
+ return initv, nil
+ }
+}
+
+func (i *Interpreter) variable(v ast.Variable) (any, error) {
+ if val, ok := i.env.Get(v.Name.Value, false); ok {
+ value := val.Value
+ if v.Index != nil {
+ index, err := i.evaluate(v.Index)
+ if err != nil {
+ return nil, err
+ }
+ idx, ok := index.(ast.Number)
+ if !ok || idx < 0 || int(idx) >= *v.Type.Len {
+ return nil, errors.NewError(errors.InvalidIndex, v.Name.Loc, "invalid index for", v.Type.String())
+ }
+ arr, ok := i.env.GetArray(v.Name.Value, false)
+ if !ok {
+ return nil, errors.NewError(errors.Unexpected, v.Name.Loc, "expected array")
+ }
+ value = arr[idx]
+ }
+ return value, nil
+ }
+ // shouldn't happen
+ return nil, errors.NewError(errors.Unexpected, v.Name.Loc, "undefined variable", v.String())
+}
+
+func (i *Interpreter) assignment(v ast.Assign) (any, error) {
+ env := i.env
+ vLeft := v.Left
+
+ if getExpr, ok := v.Left.(ast.GetExpr); ok {
+ obj, err := i.evaluate(getExpr.Object)
+ if err != nil {
+ return nil, err
+ }
+ strct := obj.(Struct)
+ env = strct.env
+ vLeft = getExpr.Expr
+ }
+
+ left, ok := vLeft.(ast.Variable)
+ if !ok {
+ return nil, errors.NewError(errors.Unexpected, v.Loc, "expected variable in assignation")
+ }
+ right, err := i.evaluate(v.Right)
+ if err != nil {
+ return nil, err
+ }
+
+ if left.Type.Value.Id == tokens.TArray && v.Right.Resolve().Value.Id != tokens.TArray {
+ if left.Index == nil {
+ return nil, errors.NewError(errors.Unexpected, v.Loc, "expected index in assignation")
+ }
+ index, err := i.evaluate(left.Index)
+ if err != nil {
+ return nil, err
+ }
+ idx, ok := index.(ast.Number)
+ if !ok || idx < 0 || int(idx) >= *left.Type.Len {
+ return nil, errors.NewError(errors.InvalidIndex, v.Loc, "invalid index for", left.Type.String())
+ }
+ arr, ok := i.env.GetArray(left.Name.Value, false)
+ if !ok {
+ return nil, errors.NewError(errors.Unexpected, v.Loc, "expected array")
+ }
+ arr[idx] = right
+ right = arr
+ }
+
+ err = env.Update(left.Name.Value, right)
+ if err != nil {
+ return nil, errors.NewError(errors.InvalidValue, left.Name.Loc, err.Error())
+ }
+ return right, nil
+}
+
+func (i *Interpreter) binaryExpr(expr ast.Binary) (any, error) {
+ // first lazy operators
+ if expr.Op.Id == tokens.And || expr.Op.Id == tokens.Or {
+ left, err := i.evaluate(expr.Left)
+ if err != nil {
+ return nil, err
+ }
+ if expr.Op.Id == tokens.And {
+ if !i.isTrue(left) {
+ return false, nil
+ }
+ } else {
+ if i.isTrue(left) {
+ return true, nil
+ }
+ }
+ right, err := i.evaluate(expr.Right)
+ if err != nil {
+ return nil, err
+ }
+ return i.isTrue(right), nil
+ }
+
+ left, err := i.evaluate(expr.Left)
+ if err != nil {
+ return nil, err
+ }
+ right, err := i.evaluate(expr.Right)
+ if err != nil {
+ return nil, err
+ }
+
+ // equality compares two values as long as they are the same type
+ switch expr.Op.Id {
+ case tokens.Ne:
+ return right != left, nil
+ case tokens.Eq:
+ return right == left, nil
+ }
+
+ vl, vr, err := i.binaryNumeric(left, expr.Op, right)
+ if err != nil {
+ return 0, err
+ }
+
+ switch expr.Op.Id {
+ case tokens.Sub:
+ return vl - vr, nil
+ case tokens.Add:
+ return vl + vr, nil
+ case tokens.Mul:
+ return vl * vr, nil
+ case tokens.Div:
+ if vr == 0 {
+ return nil, errors.NewError(errors.InvalidOperation, expr.Op.Loc, "invalid operation: division by zero")
+ }
+ return vl / vr, nil
+ case tokens.Mod:
+ if vr == 0 {
+ return nil, errors.NewError(errors.InvalidOperation, expr.Op.Loc, "invalid operation: division by zero")
+ }
+ return vl % vr, nil
+ case tokens.BitAnd:
+ return vl & vr, nil
+ case tokens.BitShl:
+ return vl << vr, nil
+ case tokens.BitShr:
+ return vl >> vr, nil
+ case tokens.BitOr:
+ return vl | vr, nil
+ case tokens.BitXor:
+ return vl ^ vr, nil
+ case tokens.Lt:
+ return vl < vr, nil
+ case tokens.Le:
+ return vl <= vr, nil
+ case tokens.Gt:
+ return vl > vr, nil
+ case tokens.Ge:
+ return vl >= vr, nil
+ default:
+ return nil, errors.NewError(errors.Unimplemented, expr.Op.Loc, "unimplemented operator", expr.Op.Value)
+ }
+
+}
+
+func (i *Interpreter) unaryExpr(expr ast.Unary) (any, error) {
+ if expr.Op.Id == tokens.TestE {
+ i.fError = false
+ }
+
+ right, err := i.evaluate(expr.Right)
+ if err != nil {
+ return nil, err
+ }
+
+ switch expr.Op.Id {
+ case tokens.TestE:
+ return i.fError, nil
+ case tokens.Sub:
+ vr, err := i.unaryNumeric(expr.Op, right)
+ if err != nil {
+ return nil, err
+ }
+ return -vr, nil
+ case tokens.Neg:
+ vr, err := i.unaryNumeric(expr.Op, right)
+ if err != nil {
+ return nil, err
+ }
+ return ^vr, nil
+ case tokens.Not:
+ vr := i.isTrue(right)
+ return !vr, nil
+ default:
+ return nil, errors.NewError(errors.Unimplemented, expr.Op.Loc, "unimplemented operator", expr.Op.Value)
+ }
+}
+
+func (i *Interpreter) literalExpr(expr ast.Literal) (any, error) {
+ switch expr.Value.Id {
+ case tokens.Number:
+ return expr.Numval, nil
+ case tokens.String:
+ return expr.Value.Value, nil
+ case tokens.True:
+ return true, nil
+ case tokens.False:
+ return false, nil
+ case tokens.None:
+ return nil, nil
+ default:
+ return nil, errors.NewError(errors.Unimplemented, expr.Value.Loc, "unimplemented type", expr.Value.String())
+ }
+}
+
+func (i *Interpreter) groupExpr(expr ast.Group) (any, error) {
+ return i.evaluate(expr.Expr)
+}
+
+func (i *Interpreter) blockNoEnv(block ast.Block) (any, error) {
+ var v any
+ var err error
+ for _, expr := range block.Stmts {
+ v, err = i.evaluate(expr)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return v, nil
+}
+
+func (i *Interpreter) block(block ast.Block) (any, error) {
+ pEnv := i.env
+ i.env = NewEnv(pEnv)
+ defer func() {
+ i.env = pEnv
+ }()
+
+ return i.blockNoEnv(block)
+}
+
+func (i *Interpreter) ifElse(ifElse ast.IfElse) (any, error) {
+ cond, err := i.evaluate(ifElse.Cond)
+ if err != nil {
+ return nil, err
+ }
+ if i.isTrue(cond) {
+ return i.evaluate(ifElse.True)
+ } else {
+ return i.evaluate(ifElse.False)
+ }
+}
+
+func (i *Interpreter) forEval(fcond func() (bool, error), stmts ast.Expr) (any, error) {
+ var last any
+
+ for {
+ end, result, err := func() (end bool, result any, err error) {
+ // handle "interruptions"
+ defer func() {
+ if r := recover(); r != nil {
+ if val, ok := r.(*PanicJump); ok {
+ if val.typ == PanicJumpContinue {
+ end = false
+ return
+ }
+ if val.typ == PanicJumpBreak {
+ end = true
+ result = last
+ err = nil
+ return
+ }
+ }
+ panic(r)
+ }
+ }()
+ for {
+ if fcond != nil {
+ cond, err := fcond()
+ if err != nil {
+ return true, nil, err
+ }
+ if !cond {
+ return true, last, nil
+ }
+ }
+
+ last, err = i.evaluate(stmts)
+ if err != nil {
+ return true, nil, err
+ }
+ }
+ }()
+ if end {
+ return result, err
+ }
+ }
+}
+
+func (i *Interpreter) forStmt(forStmt ast.For) (any, error) {
+ if forStmt.Cond == nil {
+ return i.forEval(nil, forStmt.Stmts)
+ } else {
+ return i.forEval(func() (bool, error) {
+ cond, err := i.evaluate(forStmt.Cond)
+ if err != nil {
+ return false, err
+ }
+ return i.isTrue(cond), nil
+ }, forStmt.Stmts)
+ }
+}
+
+func (i *Interpreter) forInStmt(forInStmt ast.ForIn) (any, error) {
+ expr, err := i.evaluate(forInStmt.Expr)
+ if err != nil {
+ return nil, err
+ }
+
+ index := 0
+ arr := expr.([]any)
+
+ pEnv := i.env
+ i.env = NewEnv(pEnv)
+ defer func() {
+ i.env = pEnv
+ }()
+ i.env = i.env.Set(forInStmt.Name.Value, Var{Loc: forInStmt.Name.Loc})
+
+ return i.forEval(func() (bool, error) {
+ if index == len(arr) {
+ return false, nil
+ }
+ i.env.Update(forInStmt.Name.Value, arr[index])
+ index++
+ return true, nil
+ }, forInStmt.Stmts)
+}
+
+func (i *Interpreter) evalArgs(name string, loc tokens.Location, params []ast.Type, args []ast.Expr) ([]any, error) {
+ vals := make([]any, 0, 16)
+ for _, a := range args {
+ arg, err := i.evaluate(a)
+ if err != nil {
+ return nil, err
+ }
+ vals = append(vals, arg)
+ }
+
+ return vals, nil
+}
+
+func (i *Interpreter) call(call ast.Call) (result any, err error) {
+ callee, err := i.evaluate(call.Callee)
+ if err != nil {
+ return nil, err
+ }
+
+ fun, ok := callee.(Callable)
+ if !ok {
+ return nil, errors.NewError(errors.NotCallable, call.Loc, "value is not callable")
+ }
+
+ args, err := i.evalArgs(fun.Name(), call.Loc, fun.Params(), call.Args)
+ if err != nil {
+ return nil, err
+ }
+
+ // handle return via panic call
+ defer func() {
+ if r := recover(); r != nil {
+ if val, ok := r.(*PanicJump); ok && val.typ == PanicJumpReturn {
+ result = val.value
+ err = nil
+ } else {
+ // won't be handled here
+ panic(r)
+ }
+ }
+ }()
+
+ _, err = fun.Call(i, args, call.Loc)
+ if err != nil {
+ e := errors.NewError(errors.CallError, call.Loc, "error calling", fun.Name()).(errors.Error)
+ e.Err = err
+ return nil, e
+ }
+
+ // clear return if there's no return value
+ if fun.Ret() == nil {
+ return nil, nil
+ }
+
+ return nil, errors.NewError(errors.NoReturn, i.last, "no return value in", fun.Name(), "expected", fun.Ret().String())
+}
+
+func (i *Interpreter) fun(v ast.Func) (any, error) {
+ callable := Function{fun: v, closure: i.env}
+ i.env = i.env.Set(v.Name.Value, Var{callable, v.Name.Loc})
+ return nil, nil
+}
+
+func (i *Interpreter) strct(v ast.Struct) (any, error) {
+ pEnv := i.env
+ sEnv := NewEnv(pEnv)
+ i.env = sEnv
+ defer func() {
+ i.env = pEnv
+ strct := Struct{name: v.Name.Value, env: sEnv}
+ i.env = i.env.Set(v.Name.Value, Var{Value: strct, Loc: v.Name.Loc})
+ }()
+
+ _, err := i.blockNoEnv(v.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ return nil, nil
+}
+
+func (i *Interpreter) getExpr(v ast.GetExpr) (any, error) {
+ obj, err := i.evaluate(v.Object)
+ if err != nil {
+ return nil, err
+ }
+
+ if val, ok := obj.(Struct); ok {
+ name := v.Expr.(ast.Variable)
+ if name.Index == nil {
+ expr, _ := val.env.Get(name.Name.Value, true)
+ return expr.Value, nil
+ }
+
+ arr, _ := val.env.GetArray(name.Name.Value, true)
+ index, err := i.evaluate(name.Index)
+ if err != nil {
+ return nil, err
+ }
+ return arr[index.(ast.Number)], nil
+ }
+
+ // shouldn't happen
+ return nil, errors.NewError(errors.Unimplemented, v.Resolve().Value.Loc, "unimplemented get-expr")
+}
+
+func (i *Interpreter) returnStmt(v ast.Return) (any, error) {
+ var val any
+
+ if v.Value != nil {
+ // could be a tail call we could optimize
+ if call, ok := v.Value.(ast.Call); ok {
+ i.fError = v.Error
+ panic(&PanicJump{typ: PanicJumpTailCall, value: call, loc: v.Loc})
+ }
+
+ var err error
+ val, err = i.evaluate(v.Value)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ i.fError = v.Error
+ panic(&PanicJump{typ: PanicJumpReturn, value: val, loc: v.Loc})
+}
+
+func (i *Interpreter) continueStmt(v ast.Continue) (any, error) {
+ panic(&PanicJump{typ: PanicJumpContinue, loc: v.Loc})
+}
+
+func (i *Interpreter) breakStmt(v ast.Break) (any, error) {
+ panic(&PanicJump{typ: PanicJumpBreak, loc: v.Loc})
+}
+
+func (i *Interpreter) exprList(v ast.ExprList) (any, error) {
+ // XXX: should we set last?
+ vals := make([]any, len(v.Exprs))
+ for n, expr := range v.Exprs {
+ val, err := i.evaluate(expr)
+ if err != nil {
+ return nil, err
+ }
+ vals[n] = val
+ }
+ return vals, nil
+}
diff --git a/interpreter/interpreter_test.go b/interpreter/interpreter_test.go
new file mode 100644
index 0000000..d9c63ae
--- /dev/null
+++ b/interpreter/interpreter_test.go
@@ -0,0 +1,754 @@
+package interpreter
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "usebox.net/lang/ast"
+ "usebox.net/lang/errors"
+ "usebox.net/lang/parser"
+ "usebox.net/lang/tokenizer"
+)
+
+func run(input string) (any, error) {
+ tr := tokenizer.NewTokenizer("-", strings.NewReader(input))
+ toks, err := tr.Scan()
+ if err != nil {
+ return nil, err
+ }
+ p := parser.NewParser(BuiltInTypes())
+ tree, err := p.Parse(toks)
+ if err != nil {
+ errs := err.(errors.ErrorList)
+ return nil, errs.Errors[0]
+ }
+
+ i := NewInterpreter()
+ var val any
+ for _, expr := range tree {
+ if val, err = i.Interp(expr); err != nil {
+ return nil, err
+ }
+ }
+ return val, nil
+}
+
+func expectNumber(t *testing.T, input string, n ast.Number) {
+ val, err := run(input)
+ if err != nil {
+ t.Errorf("Unexpected error %s (input: %s)", err, input)
+ }
+ i, ok := val.(ast.Number)
+ if !ok {
+ t.Errorf("Number expected, got %T instead (input: %s)", val, input)
+ } else if i != n {
+ t.Errorf("%d expected, got %d instead (input: %s)", n, i, input)
+ }
+}
+
+func expectBool(t *testing.T, input string, b bool) {
+ val, err := run(input)
+ if err != nil {
+ t.Errorf("Unexpected error %s (input: %s)", err, input)
+ }
+ i, ok := val.(bool)
+ if !ok {
+ t.Errorf("bool expected, got %T instead (input: %s)", val, input)
+ } else if i != b {
+ t.Errorf("%v expected, got %v instead (input: %s)", b, i, input)
+ }
+}
+
+func expectString(t *testing.T, input string, s string) {
+ val, err := run(input)
+ if err != nil {
+ t.Errorf("unexpected error %s (input: %s)", err, input)
+ }
+ i, ok := val.(string)
+ if !ok {
+ t.Errorf("string expected, got %T instead (input: %s)", val, input)
+ } else if i != s {
+ t.Errorf("%s expected, got %s instead (input: %s)", s, i, input)
+ }
+}
+
+func expectError(t *testing.T, input string, code errors.ErrCode) {
+ _, err := run(input)
+ if err == nil {
+ t.Errorf("expected error and didn't happen (input: %s)", input)
+ }
+ if e, ok := err.(errors.Error); !ok {
+ t.Errorf("error not an Error (input: %s): %s", input, err)
+ } else {
+ if e.Code != code {
+ t.Errorf("error %d expected, got %d -> %s (input: %s)", code, e.Code, e, input)
+ }
+ }
+}
+
+func TestNumExpr(t *testing.T) {
+ expectNumber(t, "1;", 1)
+ expectNumber(t, "123;", 123)
+ expectNumber(t, "0x123;", 0x123)
+ expectNumber(t, "0b101;", 0b101)
+ expectNumber(t, "1 + 1;", 2)
+ expectNumber(t, "2 + 3 * 4;", 14)
+ expectNumber(t, "(5 - (3 - 1)) + -1;", 2)
+}
+
+func TestInvalidOp(t *testing.T) {
+ // these operators are only valid for numbers
+
+ expectError(t, "true + \"a\";", errors.InvalidOperation)
+ expectError(t, "\"a\" + false;", errors.InvalidOperation)
+ expectError(t, "\"a\" + \"a\";", errors.InvalidOperation)
+ expectError(t, "-false;", errors.InvalidOperation)
+ expectError(t, "-true;", errors.InvalidOperation)
+ expectError(t, "~false;", errors.InvalidOperation)
+ expectError(t, "~true;", errors.InvalidOperation)
+ expectError(t, "-\"a\";", errors.InvalidOperation)
+ expectError(t, "~\"a\";", errors.InvalidOperation)
+
+ for _, op := range []string{
+ "-", "+", "*", "/", "%", "&", "<<", ">>", "|", "^", "<", ">", "<=", ">=",
+ } {
+ expectError(t, fmt.Sprintf("1 %s true;", op), errors.InvalidOperation)
+ expectError(t, fmt.Sprintf("true %s 1;", op), errors.InvalidOperation)
+ expectError(t, fmt.Sprintf("1 %s \"a\";", op), errors.InvalidOperation)
+ expectError(t, fmt.Sprintf("\"a\" %s 1;", op), errors.InvalidOperation)
+ }
+
+ // these are invalid with numbers too
+ expectError(t, "1 / 0;", errors.InvalidOperation)
+ expectError(t, "1 % 0;", errors.InvalidOperation)
+}
+
+func TestAssign(t *testing.T) {
+ expectNumber(t, "var a number; a = 1;", 1)
+ expectString(t, "var a string = \"v\"; var b string = a; b;", "v")
+ expectBool(t, "var a number = 1; var b number = 2; var c number = 3; a = b = c; a == b && a== 3;", true)
+ expectBool(t, "var a number; var b number = a = 1; a == b && a == 1;", true)
+
+ // local
+ expectNumber(t, `{
+ var a number = 1;
+ if a != 1 {
+ panic("1 expected");
+ }
+ a = 2;
+ if a != 2 {
+ panic("2 expected");
+ }
+ println(a = 3);
+ a;
+ }`, 3)
+
+ // scopes and shadowing
+ expectNumber(t, `
+ var a number = 1;
+ if a != 1 {
+ panic("1 expected");
+ }
+ {
+ var a number = 2;
+ if a != 2 {
+ panic("2 expected");
+ }
+ }
+ a;`, 1)
+
+ expectError(t, "a = 1;", errors.UndefinedIdent)
+
+ // syntax error; perhaps should be in the parser
+ if _, err := run("var a number; (a) = 1;"); err == nil {
+ t.Error("error expected by didn't happen")
+ }
+ if _, err := run("var a number; a + a = 1;"); err == nil {
+ t.Error("error expected by didn't happen")
+ }
+}
+
+func TestPrintln(t *testing.T) {
+ expectNumber(t, "println();", 1)
+ expectNumber(t, "println(1);", 2)
+ expectNumber(t, "println(true);", 5)
+ expectNumber(t, "println(\"hello\");", 6)
+ expectNumber(t, "println(1, \"one\");", 5)
+}
+
+func TestEquality(t *testing.T) {
+ expectBool(t, "true == true;", true)
+ expectBool(t, "true == false;", false)
+ expectBool(t, "false == true;", false)
+ expectBool(t, "false == false;", true)
+
+ expectBool(t, "1 == 1;", true)
+ expectBool(t, "0 != 1;", true)
+ expectBool(t, "\"a\" == \"a\";", true)
+ expectBool(t, "\"a\" != \"b\";", true)
+
+ expectBool(t, "true != true;", false)
+ expectBool(t, "true != false;", true)
+ expectBool(t, "false != true;", true)
+ expectBool(t, "false != false;", false)
+
+ expectError(t, "true == 1;", errors.TypeMismatch)
+ expectError(t, "true == \"true\";", errors.TypeMismatch)
+ expectError(t, "false == 0;", errors.TypeMismatch)
+ expectError(t, "false == \"\";", errors.TypeMismatch)
+ expectError(t, "false == \"false\";", errors.TypeMismatch)
+}
+
+func TestNot(t *testing.T) {
+ expectBool(t, "!true;", false)
+ expectBool(t, "!false;", true)
+
+ expectBool(t, "!0;", true)
+ expectBool(t, "!1;", false)
+ expectBool(t, "!\"\";", true)
+ expectBool(t, "!\"a\";", false)
+
+ expectBool(t, "!!true;", true)
+}
+
+func TestLogical(t *testing.T) {
+ expectBool(t, "true && true;", true)
+ expectBool(t, "true && false;", false)
+ expectBool(t, "false && true;", false)
+
+ expectBool(t, "true || true;", true)
+ expectBool(t, "true || false;", true)
+ expectBool(t, "false || true;", true)
+
+ expectBool(t, "1 == 1 && 1 != 2;", true)
+ expectBool(t, "1 != 1 || 1 != 2;", true)
+
+ expectBool(t, "1 & 1 && 0 | 1;", true)
+ expectBool(t, "1 & 1 && 2 & 1;", false)
+ expectBool(t, "1 & 1 || 0 | 1;", true)
+ expectBool(t, "2 & 1 || 1 & 2;", false)
+}
+
+func TestIfElse(t *testing.T) {
+ expectBool(t, "if 1 == 1 { true; }", true)
+
+ val, err := run("if 1 != 1 { true; }")
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if val != nil {
+ t.Errorf("none expected, %T found", val)
+ }
+
+ expectBool(t, "if 1 != 1 { false; } else { true; }", true)
+ expectBool(t, "if 1 == 1 { false; } else { true; }", false)
+
+ expectBool(t, "if 1 != 1 { false; } else { if 1 == 1 { true; }}", true)
+
+ val, err = run("if 1 != 1 { true; } else { if 1 != 1 { false; }}")
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if val != nil {
+ t.Errorf("none expected, %T found", val)
+ }
+}
+
+func TestPanic(t *testing.T) {
+ _, err := run("panic(\"error\");")
+ if err == nil {
+ t.Error("error expected and didn't happen")
+ }
+}
+
+func TestFunc(t *testing.T) {
+ expectString(t, `
+ def wrap() func () string {
+ def inFn() string {
+ return "output";
+ }
+ return inFn;
+ }
+ var fn func () string = wrap();
+ fn();
+ `, "output")
+
+ expectNumber(t, `
+ def makeCounter() func () number {
+ var i number;
+ def count() number {
+ i = i + 1;
+ return i;
+ }
+ return count;
+ }
+ makeCounter()();
+ `, 1)
+
+ expectBool(t, `
+ // why not?
+ def loop(from number, to number, fn func (number)) {
+ if from > to {
+ return;
+ }
+ fn(from);
+ // return is needed to trigger the tail-call optimization
+ return loop(from + 1, to, fn);
+ }
+
+ def wilsonPrime(n number) bool {
+ var acc number = 1;
+ def fact(i number) {
+ acc = (acc * i) % n;
+ }
+ loop(2, n - 1, fact);
+ return acc == n - 1;
+ }
+
+ wilsonPrime(1789);
+ `, true)
+
+ expectNumber(t, `
+ var a number = 1;
+ {
+ var c number;
+ def add() {
+ c = c + a;
+ println(c, " ", a);
+ }
+ add(); // c is 1
+ var a string;
+ add(); // c is 2
+ println(c);
+ c;
+ }
+ `, 2)
+
+ expectBool(t, `
+ var a bool;
+ def fn() func () {
+ def inFn() {
+ println("works");
+ a = true;
+ }
+ return inFn;
+ }
+ fn()();
+ a;`, true)
+
+ expectBool(t, `
+ var a bool;
+ def fn(a bool) bool {
+ return a;
+ }
+ fn(true);`, true)
+
+ expectString(t, `
+ def fn() { }
+ {
+ def fn() { }
+ }
+ "OK";`, "OK")
+
+}
+
+func TestFuncError(t *testing.T) {
+ expectString(t, `
+ var fn func () bool;
+ "OK";`, "OK")
+
+ expectError(t, `
+ var fn func () bool;
+ fn();`, errors.NotCallable)
+
+ expectError(t, `
+ def a() bool { return true; }
+ var b func () number = a;
+ `, errors.TypeMismatch)
+
+ expectError(t, `
+ // return with no function
+ return false;
+ `, errors.InvalidOperation)
+
+ expectError(t, `
+ def fn() bool { return 0; }
+ fn();
+ `, errors.TypeMismatch)
+
+ expectError(t, `
+ def fn() bool { }
+ fn();
+ `, errors.NoReturn)
+
+ expectError(t, "true();", errors.NotCallable)
+ expectError(t, "\"bad\"();", errors.NotCallable)
+ expectError(t, "10();", errors.NotCallable)
+ expectError(t, "true(1);", errors.NotCallable)
+ expectError(t, "\"bad\"(1);", errors.NotCallable)
+ expectError(t, "10(1);", errors.NotCallable)
+
+ expectError(t, `
+ var fn func() number = println;
+ fn("hello");`, errors.CallError)
+}
+
+func TestFor(t *testing.T) {
+ expectString(t, `
+ for false {
+ panic("oops");
+ }
+ "OK";
+ `, "OK")
+
+ // the value emited by the loops is the last
+ // evaluated value inside the loop
+ expectNumber(t, `
+ var i number;
+ for i < 10 {
+ i = i + 1;
+ }
+ `, 10)
+
+ expectNumber(t, `
+ def mkGen() func () number {
+ var n number;
+ def gen() number {
+ n = n + 1;
+ return n;
+ }
+ return gen;
+ }
+ var c func() number = mkGen();
+ var acc number;
+ for c() < 10 {
+ acc = acc + 1;
+ }
+ acc;
+ `, 9)
+
+ expectNumber(t, `
+ var acc number;
+ var j number;
+ for j < 10 {
+ var i number;
+ for i < 10 {
+ i = i + 1;
+ acc = acc + 1;
+ }
+ j = j + 1;
+ }
+ acc;
+ `, 100)
+
+ expectError(t, `
+ var a number;
+ for {
+ a = 1 / a;
+ }
+ `, errors.InvalidOperation)
+}
+
+func TestForIn(t *testing.T) {
+ // the value emited by the loops is the last
+ // evaluated value inside the loop
+ expectNumber(t, `
+ var arr [3]number = [1, 2, 10];
+ for i in arr {
+ i;
+ }
+ `, 10)
+
+ // shadow the array variable
+ expectNumber(t, `
+ var a [3]number = [1, 2, 10];
+ for a in a {
+ println(a);
+ a;
+ }
+ `, 10)
+
+ expectNumber(t, `
+ var acc number;
+ var j [10]number;
+ for i in j {
+ for i in j {
+ acc = acc + 1;
+ }
+ }
+ acc;
+ `, 100)
+}
+
+func TestContinue(t *testing.T) {
+ expectString(t, `
+ var a number;
+ for a < 10 {
+ a = a + 1;
+ if a & 1 {
+ continue;
+ panic("oops");
+ }
+ println(a);
+ }
+ "OK";
+ `, "OK")
+
+ expectString(t, `
+ var arr [5]number = [1, 2, 3, 4, 5];
+ for a in arr {
+ if a & 1 {
+ continue;
+ panic("oops");
+ }
+ println(a);
+ }
+ "OK";
+ `, "OK")
+
+ expectNumber(t, `
+ var acc number;
+ var a number;
+ for a < 10 {
+ var b number;
+ for b < 10 {
+ b = b + 1;
+ if b & 1 {
+ continue;
+ panic("oops");
+ }
+ acc = acc + b;
+ }
+ a = a + 1;
+ }
+ acc;
+ `, (2+4+6+8+10)*10)
+}
+
+func TestBreak(t *testing.T) {
+ expectString(t, `
+ for {
+ break;
+ panic("oops");
+ }
+ "OK";
+ `, "OK")
+
+ expectString(t, `
+ var a [10] number;
+ for i in a {
+ break;
+ panic("oops");
+ }
+ "OK";
+ `, "OK")
+
+ expectNumber(t, `
+ var a number;
+ for {
+ if a == 20 {
+ break;
+ }
+ a = a + 1;
+ for true {
+ a = a + 1;
+ break;
+ a = a + 1;
+ }
+ }
+ a;
+ `, 20)
+}
+
+func TestArray(t *testing.T) {
+ expectError(t, "var a [0]number;", errors.InvalidLength)
+ expectError(t, "var a [1]number; a[-1];", errors.InvalidIndex)
+ expectError(t, "var a [1]number; a[2];", errors.InvalidIndex)
+ expectError(t, "var a [1]number; a[-1] = 1;", errors.InvalidIndex)
+ expectError(t, "var a [1]number; a[2] = 1;", errors.InvalidIndex)
+ expectError(t, "var a [5][5]number;", errors.InvalidType)
+ expectError(t, "var a number; len(a);", errors.TypeMismatch)
+
+ expectNumber(t, `
+ var arr [5]number = [1, 2, 3, 4, 5];
+
+ var i number = 1;
+ for i < len(arr) {
+ arr[0] = arr[0] + arr[i];
+ i = i + 1;
+ }
+ arr[0];
+ `, 1+2+3+4+5)
+
+ expectString(t, `
+ var arr [3]string = ["one", "two", "three"];
+ var arr2 [3]string;
+
+ var i number;
+ for i < len(arr) {
+ arr2[i] = arr[i];
+ i = i + 1;
+ }
+ i = 0;
+ for i < len(arr) {
+ if arr2[i] != arr[i] {
+ panic("failed");
+ }
+ i = i + 1;
+ }
+ "OK";
+ `, "OK")
+
+ expectBool(t, `
+ var a [5]bool;
+ def fn(v [5]bool) {
+ v[2] = true;
+ }
+ fn(a);
+ println(a);
+ a[2];
+ `, true)
+
+ expectNumber(t, `
+ const a [3]number = [1, 2, 3];
+ const b [3]number = [3, 2, 1];
+ const c [3]number = a;
+ c = b;
+ c[0];
+ `, 3)
+
+ expectNumber(t, `
+ var acc number;
+
+ def a() {
+ println("this is a");
+ acc = acc + 10;
+ }
+
+ def b() {
+ println("this is b");
+ acc = acc + 10;
+ }
+
+ var arr [2]func () = [a, b];
+
+ for i in arr {
+ i();
+ }
+ acc;`, 20)
+}
+
+func TestErrorTag(t *testing.T) {
+ expectString(t, `
+ def fn() { return !?; }
+ if ? fn() {
+ "KO";
+ }
+ `, "KO")
+ expectString(t, `
+ def fn() { return; }
+ if ? fn() {
+ "KO";
+ } else {
+ "OK";
+ }
+ `, "OK")
+ expectString(t, `
+ def fn() number { return !? 0; }
+ if ? fn() {
+ "KO";
+ }
+ `, "KO")
+ expectString(t, `
+ def fn() number { return 0; }
+ if !(? fn()) {
+ "OK";
+ }
+ `, "OK")
+ expectNumber(t, `
+ def fn() number { return !? 99; }
+ var n number;
+ if ? (n = fn()) {
+ n;
+ }
+ `, 99)
+ expectString(t, `
+ def fn() { return !?; }
+ fn();
+ // ? resets the error flag before evaluating the right side
+ if ? true {
+ "KO";
+ } else {
+ "OK";
+ }
+ `, "OK")
+}
+
+func TestStruct(t *testing.T) {
+ expectString(t, `
+ def A { var p string = "OK"; }
+ var a A;
+ a.p;
+ `, "OK")
+ expectNumber(t, `
+ def A { var p number = 10; }
+ var a A;
+ a.p;
+ `, 10)
+ expectNumber(t, `
+ def A { var p number; }
+ var a A;
+ a.p = 10;
+ a.p;
+ `, 10)
+ expectNumber(t, `
+ def A { var p number = 10; }
+ var a A;
+ a.p = 100;
+ var b A;
+ b.p;
+ `, 10)
+ expectNumber(t, `
+ def A { var p number = 10; }
+ var a [3]A;
+ a[0].p + a[1].p + a[2].p;
+ `, 30)
+ expectNumber(t, `
+ def A {
+ var arr [3]number = [1, 2, 3];
+
+ def p(index number) number {
+ return arr[index];
+ }
+ }
+ var a A;
+ a.p(0) + a.p(1) + a.p(2);
+ `, 6)
+ expectNumber(t, `
+ def A {
+ var p number;
+ }
+ def B {
+ var arr [3]A;
+ var i number;
+
+ def new() A {
+ var r A = arr[i];
+ r.p = i + 1;
+ i = (i + 1) % 3;
+ return r;
+ }
+ }
+ var b B;
+ b.new().p + b.new().p + b.new().p;
+ // new should have returned a reference
+ b.arr[0].p + b.arr[1].p + b.arr[2].p;
+ `, 6)
+
+ expectError(t, `
+ def A {
+ var a A;
+ }
+ `, errors.RecursiveStruct)
+}
diff --git a/micro-vim/ftdetect/micro.vim b/micro-vim/ftdetect/micro.vim
new file mode 100644
index 0000000..cd69d70
--- /dev/null
+++ b/micro-vim/ftdetect/micro.vim
@@ -0,0 +1,3 @@
+autocmd BufNewFile,BufRead *.micro set filetype=micro
+autocmd BufNewFile,BufRead *.cro set filetype=micro
+
diff --git a/micro-vim/ftplugin/micro.vim b/micro-vim/ftplugin/micro.vim
new file mode 100644
index 0000000..ea3ce5a
--- /dev/null
+++ b/micro-vim/ftplugin/micro.vim
@@ -0,0 +1,26 @@
+
+if exists("b:did_ftplugin")
+ finish
+endif
+let b:did_ftplugin = 1
+
+let s:save_cpo = &cpo
+set cpo-=C
+
+setlocal suffixesadd+=.micro,.cro
+setlocal comments=b://,fb:-
+setlocal commentstring=//\ %s
+
+setlocal smartindent cindent
+
+setlocal tabstop=4 shiftwidth=4 softtabstop=4 expandtab
+
+" run make editing a micro file and it will be parsed for errors
+setlocal makeprg=micro\ -parse\ %
+setlocal errorformat=%f:%l\ col\ %c\ %trror:\ %m
+
+let b:undo_ftplugin="setl com<"
+
+let &cpo = s:save_cpo
+unlet s:save_cpo
+
diff --git a/micro-vim/syntax/micro.vim b/micro-vim/syntax/micro.vim
new file mode 100644
index 0000000..f620579
--- /dev/null
+++ b/micro-vim/syntax/micro.vim
@@ -0,0 +1,90 @@
+if exists("b:current_syntax")
+ finish
+endif
+
+let s:cpo_save = &cpo
+set cpo&vim
+
+syn clear
+syn sync fromstart
+
+syn match microNumber /\<[0-9]\d*\>/
+
+syn match microEscapeHex /\\x\x{2}/ transparent contained
+syn match microEscapeChar /\\[nt\\']/ transparent contained
+syn match microEscapeString /\\[nt\\"]/ transparent contained
+
+syn cluster microCharacterGroup contains=microEscapeChar,microEscapeHex
+syn region microCharacter start=/'/ skip=/\\\\\|\\'/ end=/'/ contains=@microCharacterGroup
+
+syn cluster microStringGroup contains=microEscapeString,microEscapeHex
+syn region microString start=/"/ skip=/\\\\\|\\"/ end=/"/ contains=@Spell,microStringGroup
+
+syn keyword microConditional if else continue break
+syn keyword microRepeat for in
+syn keyword microType number bool string func
+syn keyword microBoolean true false
+syn keyword microKeyword len
+syn keyword microStatement return nextgroup=microTagE skipwhite skipempty
+
+syn match microTagE /!?/ display contained
+
+"" this is probably not correct
+syn match microOperator display "?\|+\|-\|/\|*\|=\|=\|\^\|&\||\|!\|!=\|>\|>=\|<\|<=\|\~\|%"
+syn match microOperator display "&&\|||"
+
+syn keyword microKeyword def nextgroup=microFuncName skipwhite skipempty
+syn match microFuncName /[_a-zA-Z][_a-zA-Z0-9]*/ nextgroup=microFuncParams display contained
+syn region microFuncParams start=/(/ end=/)/ contains=microVarIdent,microNumber,microChar nextgroup=microVarType,microVarArray skipwhite contained
+
+syn match mucroPropStart /\./ nextgroup=microProp skipwhite
+syn match microProp /[_a-zA-Z][_a-zA-Z0-9]*/ display contained
+
+syn keyword microKeyword var nextgroup=microVarIdent skipwhite skipempty
+syn keyword microKeyword const nextgroup=microVarIdent skipwhite skipempty
+syn match microVarIdent /[_a-zA-Z][_a-zA-Z0-9]*/ nextgroup=microVarType,microVarArray skipwhite
+syn match microVarArray /\[[0-9]*\]/ nextgroup=microVarType transparent contained skipwhite
+syn match microVarType /[_a-zA-Z][_a-zA-Z0-9]*/ display contained
+
+syn match microCall /[_a-zA-Z][_a-zA-Z0-9]*(/he=e-1,me=e-1
+
+syn region microIndex start=/\[/ end=/\]/ contains=microNumber,microChar,microIndexIdent,microIndexCall
+syn match microIndexIdent /[_a-zA-Z][_a-zA-Z0-9]*/ display contained
+syn match microIndexCall /[_a-zA-Z][_a-zA-Z0-9]*(/he=e-1,me=e-1 display contained
+
+syn keyword microBuiltin println panic
+
+syn region microBlock start="{" end="}" transparent fold
+
+syn keyword microCommentTodo TODO FIXME XXX BUG contained
+syn match microLineComment "\/\/.*" contains=@Spell,microCommentTodo
+
+hi def link microLineComment Comment
+hi def link microCommentTodo Todo
+hi def link microNumber Number
+hi def link microCharacter Character
+hi def link microString String
+hi def link microStrEmbeddedQuote String
+hi def link microConditional Conditional
+hi def link microRepeat Repeat
+hi def link microOperator Operator
+hi def link microType Type
+hi def link microVarType Type
+hi def link microBoolean Boolean
+hi def link microKeyword Keyword
+hi def link microStatement Statement
+hi def link microTagE Keyword
+
+hi def link microBuiltin Function
+hi def link microFuncName Function
+hi def link microCall Function
+hi def link microProp Identifier
+
+hi def link microIndexIdent Number
+hi def link microIndexCall Function
+
+let b:current_syntax = "micro"
+
+let &cpo = s:cpo_save
+unlet s:cpo_save
+
diff --git a/parser/parser.go b/parser/parser.go
new file mode 100644
index 0000000..65007b7
--- /dev/null
+++ b/parser/parser.go
@@ -0,0 +1,1168 @@
+package parser
+
+import (
+ "fmt"
+ "strconv"
+
+ "usebox.net/lang/ast"
+ "usebox.net/lang/errors"
+ "usebox.net/lang/tokens"
+)
+
+// XXX: duplicated
+const runtime = "<rt>"
+
+type Env struct {
+ env map[string]ast.Type
+ parent *Env
+}
+
+func NewEnv(parent *Env) *Env {
+ return &Env{make(map[string]ast.Type), parent}
+}
+
+func (e *Env) Get(key string, local bool) (ast.Type, bool) {
+ if v, ok := e.env[key]; ok {
+ return v, true
+ }
+ if !local && e.parent != nil {
+ return e.parent.Get(key, local)
+ }
+ return ast.TypeNone, false
+}
+
+func (e *Env) Set(key string, typ ast.Type) *Env {
+ n := e
+ if _, ok := e.Get(key, false); ok {
+ n = NewEnv(e)
+ }
+ n.env[key] = typ
+ return n
+}
+
+type Parser struct {
+ tokens []tokens.Token
+ pos int
+ env *Env
+ structs map[string]*Env
+ currFuncRet *ast.Type
+ currLoop bool
+}
+
+func NewParser(builtIns map[string]ast.Type) Parser {
+ global := NewEnv(nil)
+ for k, v := range builtIns {
+ // not shadowing any value
+ global.Set(k, v)
+ }
+
+ return Parser{env: global, structs: make(map[string]*Env)}
+}
+
+func (p *Parser) Parse(tokens []tokens.Token) ([]ast.Expr, error) {
+ var errs []error
+ var exprs []ast.Expr
+
+ p.tokens = tokens
+ p.pos = 0
+
+ for !p.end() {
+ expr, err := p.statement()
+ if err != nil {
+ errs = append(errs, err)
+ p.recover()
+ continue
+ }
+ exprs = append(exprs, expr)
+ }
+
+ if len(errs) > 0 {
+ return nil, errors.ErrorList{Errors: errs}
+ }
+
+ return exprs, nil
+}
+
+func (p *Parser) recover() {
+ p.next()
+
+ for !p.end() {
+ if p.prev().Id == tokens.Semicolon {
+ return
+ }
+ if p.match([]tokens.TokenId{tokens.Def, tokens.Var, tokens.Const, tokens.If, tokens.Return, tokens.For}) {
+ p.back()
+ return
+ }
+
+ p.next()
+ }
+}
+
+func (p *Parser) next() tokens.Token {
+ if !p.end() {
+ p.pos++
+ return p.prev()
+ }
+ return p.curr()
+}
+
+func (p *Parser) back() {
+ if p.pos != 0 {
+ p.pos--
+ }
+}
+
+func (p *Parser) prev() tokens.Token {
+ return p.tokens[p.pos-1]
+}
+
+func (p *Parser) curr() tokens.Token {
+ return p.tokens[p.pos]
+}
+
+func (p *Parser) end() bool {
+ return p.curr().Id == tokens.Eof
+}
+
+func (p *Parser) check(id tokens.TokenId) bool {
+ if p.end() {
+ return false
+ }
+ return p.curr().Id == id
+}
+
+func (p *Parser) match(ids []tokens.TokenId) bool {
+ for _, t := range ids {
+ if p.check(t) {
+ p.next()
+ return true
+ }
+ }
+ return false
+}
+
+func (p *Parser) statement() (ast.Expr, error) {
+ if p.match([]tokens.TokenId{tokens.Var, tokens.Const}) {
+ return p.declaration()
+ }
+
+ if p.check(tokens.Break) {
+ p.next()
+ return p.breakStmt()
+ }
+
+ if p.check(tokens.Continue) {
+ p.next()
+ return p.continueStmt()
+ }
+
+ if p.check(tokens.Return) {
+ p.next()
+ return p.returnStmt()
+ }
+
+ if p.check(tokens.For) {
+ p.next()
+ return p.forStmt()
+ }
+
+ if p.check(tokens.If) {
+ return p.ifElse()
+ }
+
+ if p.check(tokens.LBrace) {
+ return p.block()
+ }
+
+ if p.check(tokens.Def) {
+ p.next()
+ return p.definition()
+ }
+
+ expr, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+ if p.next().Id != tokens.Semicolon {
+ return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression")
+ }
+ return expr, nil
+}
+
+func (p *Parser) expr() (ast.Expr, error) {
+ return p.assign()
+}
+
+func (p *Parser) typ() (*ast.Type, error) {
+ if p.check(tokens.LBracket) {
+ tok := p.next()
+
+ size, err := p.primary()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.next().Id != tokens.RBracket {
+ return nil, errors.NewError(errors.ExpectedRBracket, p.prev().Loc, "']' expected in array definition")
+ }
+
+ typ, err := p.typ()
+ if err != nil {
+ return nil, err
+ }
+
+ if typ.Value.Id == tokens.TArray {
+ return nil, errors.NewError(errors.InvalidType, p.prev().Loc, "invalid array type", typ.String())
+ }
+
+ if size.Resolve().Compare(&ast.TypeNumber) {
+ if v, ok := size.(ast.Literal); ok {
+ tok.Id = tokens.TArray
+ intval := int(v.Numval)
+ return &ast.Type{Value: tok, Ret: typ, Len: &intval}, nil
+ }
+ }
+
+ return nil, errors.NewError(errors.InvalidValue, p.prev().Loc, "invalid value for array size")
+ }
+
+ if !p.match([]tokens.TokenId{tokens.TNumber, tokens.TBool, tokens.TString, tokens.TFunc, tokens.Ident}) {
+ return nil, errors.NewError(errors.ExpectedType, p.curr().Loc, "expected type")
+ }
+ typ := p.prev()
+
+ if typ.Id == tokens.Ident {
+ typ, ok := p.env.Get(typ.Value, false)
+ if !ok || typ.Value.Id != tokens.TStruct {
+ return nil, errors.NewError(errors.ExpectedType, p.curr().Loc, "expected type")
+ }
+ if typ.Inst {
+ return nil, errors.NewError(errors.ExpectedType, p.curr().Loc, "expected type found instance of", typ.String())
+ }
+ // this is an instance of the struct
+ typ.Inst = true
+ return &typ, nil
+ }
+
+ if typ.Id != tokens.TFunc {
+ return &ast.Type{Value: typ}, nil
+ }
+
+ if p.next().Id != tokens.LParen {
+ return nil, errors.NewError(errors.ExpectedLParen, p.prev().Loc, "'(' expected in function type")
+ }
+
+ params := make([]ast.Type, 0, 16)
+ for !p.check(tokens.RParen) {
+ typ, err := p.typ()
+ if err != nil {
+ return nil, err
+ }
+
+ params = append(params, *typ)
+ if !p.check(tokens.Comma) {
+ break
+ }
+ p.next()
+ }
+ p.next()
+
+ var ret *ast.Type
+ if !p.check(tokens.Semicolon) && !p.check(tokens.Assign) && !p.check(tokens.RParen) && !p.check(tokens.LBrace) {
+ if typ, err := p.typ(); err != nil {
+ return nil, err
+ } else {
+ ret = typ
+ }
+ }
+
+ return &ast.Type{Value: typ, Params: params, Ret: ret}, nil
+}
+
+func (p *Parser) definition() (ast.Expr, error) {
+ name := p.next()
+ if name.Id != tokens.Ident {
+ return nil, errors.NewError(errors.ExpectedIdent, name.Loc, "expected identifier")
+ }
+
+ if p.check(tokens.LBrace) {
+ return p.strctDecl(name)
+ }
+
+ return p.funcDecl(name)
+}
+
+func (p *Parser) strctDecl(name tokens.Token) (ast.Expr, error) {
+ if v, ok := p.env.Get(name.Value, true); ok {
+ loc := v.Value.Loc.String()
+ if v.Value.Loc.File == "" {
+ loc = runtime
+ }
+ return nil, errors.NewError(errors.AlreadyDeclared, name.Loc, "struct", name.String(), "already declared in", loc)
+ }
+
+ p.env = p.env.Set(name.Value, *ast.NewStructType(name))
+ pEnv := p.env
+ sEnv := NewEnv(pEnv)
+ p.env = sEnv
+
+ expr, err := p.strctBlock()
+ if err != nil {
+ return nil, err
+ }
+
+ if len(sEnv.env) == 0 {
+ return nil, errors.NewError(errors.InvalidValue, name.Loc, "empty struct")
+ }
+
+ p.structs[name.Value] = sEnv
+ p.env = pEnv
+
+ return ast.Struct{Name: name, Body: expr.(ast.Block)}, nil
+}
+
+func (p *Parser) funcDecl(name tokens.Token) (ast.Expr, error) {
+ if p.next().Id != tokens.LParen {
+ return nil, errors.NewError(errors.ExpectedLParen, p.prev().Loc, "'(' expected in function declaration")
+ }
+
+ params := make([]ast.Var, 0, 16)
+ paramTypes := make([]ast.Type, 0, 16)
+ seen := map[string]tokens.Location{}
+ for !p.check(tokens.RParen) {
+ pname := p.next()
+ if pname.Id != tokens.Ident {
+ return nil, errors.NewError(errors.ExpectedIdent, pname.Loc, "expected parameter name")
+ }
+
+ typ, err := p.typ()
+ if err != nil {
+ return nil, err
+ }
+
+ if prev, ok := seen[pname.Value]; ok {
+ return nil, errors.NewError(errors.AlreadyDeclared, pname.Loc, "parameter", pname.String(), "already declared in", prev.String())
+ }
+ seen[pname.Value] = pname.Loc
+
+ params = append(params, ast.Var{Name: pname, Type: *typ})
+ paramTypes = append(paramTypes, *typ)
+ if !p.check(tokens.Comma) {
+ break
+ }
+ p.next()
+ }
+ p.next()
+
+ var ret *ast.Type
+ if !p.check(tokens.LBrace) {
+ if typ, err := p.typ(); err != nil {
+ return nil, err
+ } else {
+ ret = typ
+ }
+ }
+
+ if v, ok := p.env.Get(name.Value, true); ok {
+ loc := v.Value.Loc.String()
+ if v.Value.Loc.File == "" {
+ loc = runtime
+ }
+ return nil, errors.NewError(errors.AlreadyDeclared, name.Loc, "function", name.String(), "already declared in", loc)
+ }
+ p.env = p.env.Set(name.Value, *ast.NewFuncType(name, paramTypes, ret))
+
+ oldFuncRet := p.currFuncRet
+ if ret != nil {
+ p.currFuncRet = ret
+ } else {
+ p.currFuncRet = &ast.TypeNone
+ }
+ defer func() {
+ p.currFuncRet = oldFuncRet
+ }()
+
+ for _, v := range params {
+ p.env = p.env.Set(v.Name.Value, v.Type)
+ }
+
+ expr, err := p.block()
+ if err != nil {
+ return nil, err
+ }
+
+ // return checks?
+ return ast.Func{Name: name, Params: params, Ret: ret, Body: expr.(ast.Block)}, nil
+}
+
+func (p *Parser) exprList() (ast.Expr, error) {
+ if p.check(tokens.LBracket) {
+ p.next()
+ vals := make([]ast.Expr, 0, 16)
+ if !p.check(tokens.RBracket) {
+ var prev ast.Expr
+ for {
+ val, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+ if prev != nil && !prev.Resolve().Compare(val.Resolve()) {
+ return nil, errors.NewError(errors.TypeMismatch, p.prev().Loc, "type mismatch in expression list: found", val.Resolve().String(), "expected", prev.Resolve().String())
+ }
+ prev = val
+
+ vals = append(vals, val)
+ if !p.check(tokens.Comma) {
+ break
+ }
+ p.next()
+ }
+ if !p.check(tokens.RBracket) {
+ return nil, errors.NewError(errors.ExpectedRBracket, p.prev().Loc, "']' expected to end expression list")
+ }
+ }
+ p.next()
+ if len(vals) == 0 {
+ return nil, errors.NewError(errors.InvalidValue, p.prev().Loc, "empty expression list")
+ }
+
+ return ast.ExprList{Exprs: vals}, nil
+ }
+
+ return p.expr()
+}
+
+func (p *Parser) decl(con bool) (ast.Expr, error) {
+ name := p.next()
+ if name.Id != tokens.Ident {
+ return nil, errors.NewError(errors.ExpectedIdent, name.Loc, "expected identifier")
+ }
+
+ typ, err := p.typ()
+ if err != nil {
+ return nil, err
+ }
+
+ if typ.Value.Id == tokens.TStruct {
+ if _, ok := p.structs[typ.Value.Value]; !ok {
+ return nil, errors.NewError(errors.RecursiveStruct, p.curr().Loc, "recursive definition of", typ.String())
+ }
+ }
+
+ var initv ast.Expr
+ if p.check(tokens.Assign) {
+ p.next()
+ var err error
+ initv, err = p.exprList()
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ if p.next().Id != tokens.Semicolon {
+ return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression")
+ }
+
+ if v, ok := p.env.Get(name.Value, true); ok {
+ loc := v.Value.Loc.String()
+ if v.Value.Loc.File == "" {
+ loc = runtime
+ }
+ return nil, errors.NewError(errors.AlreadyDeclared, name.Loc, name.String(), "already declared in", loc)
+ }
+
+ if initv != nil {
+ if v, ok := initv.(ast.ExprList); ok {
+ if typ.Value.Id != tokens.TArray {
+ return nil, errors.NewError(errors.InvalidValue, name.Loc, "expression list to initialize a type", typ.String())
+ }
+ if *typ.Len != len(v.Exprs) {
+ return nil, errors.NewError(errors.InvalidValue, name.Loc, "invalid number of elements in expression list to initialize a type", typ.String())
+ }
+ if !typ.Ret.Compare(initv.Resolve()) {
+ return nil, errors.NewError(errors.TypeMismatch, name.Loc, "type mismatch expected list of", typ.Ret.String(), "found", initv.Resolve().String())
+ }
+ } else if !typ.Compare(initv.Resolve()) {
+ return nil, errors.NewError(errors.TypeMismatch, name.Loc, "type mismatch expected", typ.String(), "found", initv.Resolve().String())
+ }
+ if initv.Resolve().Value.Id == tokens.TArray || initv.Resolve().Value.Id == tokens.TStruct {
+ if !con && initv.Resolve().ResolveConst() {
+ return nil, errors.NewError(errors.TypeMismatch, name.Loc, "variable cannot be used as referece to constant")
+ }
+ typ.Ref = true
+ }
+ }
+ if typ.Value.Id == tokens.TStruct {
+ // it is an instance
+ p.structs[name.Value] = p.structs[typ.Value.Value]
+ typ.Inst = true
+ }
+ if con {
+ if initv == nil {
+ return nil, errors.NewError(errors.ExpectedAssign, name.Loc, "expected assignation in constant declaration")
+ }
+ if typ.Value.Id == tokens.TArray {
+ // allow changing reference but not the values of the array
+ typ.Ret.Const = true
+ } else {
+ typ.Const = true
+ }
+ }
+
+ p.env = p.env.Set(name.Value, *typ)
+ return ast.Var{Name: name, Type: *typ, Initv: initv}, nil
+}
+
+func (p *Parser) declaration() (ast.Expr, error) {
+ if p.prev().Id == tokens.Const {
+ return p.decl(true)
+ }
+ return p.decl(false)
+}
+
+func (p *Parser) continueStmt() (ast.Expr, error) {
+ if p.next().Id != tokens.Semicolon {
+ return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression")
+ }
+
+ if !p.currLoop {
+ return nil, errors.NewError(errors.InvalidOperation, p.curr().Loc, "continue without loop")
+ }
+
+ return ast.Continue{Loc: p.curr().Loc}, nil
+}
+
+func (p *Parser) breakStmt() (ast.Expr, error) {
+ if p.next().Id != tokens.Semicolon {
+ return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression")
+ }
+
+ if !p.currLoop {
+ return nil, errors.NewError(errors.InvalidOperation, p.curr().Loc, "break without loop")
+ }
+
+ return ast.Break{Loc: p.curr().Loc}, nil
+}
+
+func (p *Parser) returnStmt() (ast.Expr, error) {
+ if p.currFuncRet == nil {
+ return nil, errors.NewError(errors.InvalidOperation, p.prev().Loc, "return without function call")
+ }
+
+ var isError bool
+ if p.check(tokens.TagE) {
+ p.next()
+ isError = true
+ }
+
+ if p.check(tokens.Semicolon) {
+ p.next()
+ if p.currFuncRet.Value.Id != tokens.None {
+ return nil, errors.NewError(errors.TypeMismatch, p.prev().Loc, "type mismatch in return value, expected", p.currFuncRet.String())
+ }
+ return ast.Return{Loc: p.curr().Loc, Error: isError}, nil
+ }
+
+ expr, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.next().Id != tokens.Semicolon {
+ return nil, errors.NewError(errors.ExpectedSemicolon, p.prev().Loc, "';' expected after expression")
+ }
+
+ exprRet := expr.Resolve()
+ if exprRet == nil {
+ exprRet = &ast.TypeNone
+ } else {
+ if exprRet.Value.Id == tokens.TArray || exprRet.Value.Id == tokens.TStruct {
+ if v, ok := expr.(ast.Variable); ok {
+ if val, ok := p.env.Get(v.Name.Value, true); ok && !val.Ref {
+ return nil, errors.NewError(errors.InvalidOperation, p.prev().Loc, "returning a local value of type", exprRet.String())
+ }
+ }
+ }
+ }
+
+ if !p.currFuncRet.Compare(exprRet) {
+ return nil, errors.NewError(errors.TypeMismatch, p.prev().Loc, "type mismatch in return value, expected", p.currFuncRet.String(), "found", exprRet.String())
+ }
+
+ return ast.Return{Value: expr, Loc: p.prev().Loc, Error: isError}, nil
+}
+
+func (p *Parser) forBody() (ast.Expr, error) {
+ oldCurrLoop := p.currLoop
+ p.currLoop = true
+ defer func() {
+ p.currLoop = oldCurrLoop
+ }()
+
+ return p.block()
+}
+
+func (p *Parser) forStmt() (ast.Expr, error) {
+ var expr ast.Expr
+ var err error
+
+ if !p.check(tokens.LBrace) {
+ if p.check(tokens.Ident) {
+ ident := p.next()
+ if p.check(tokens.In) {
+ p.next()
+ expr, err = p.expr()
+ if err != nil {
+ return nil, err
+ }
+ if expr.Resolve().Value.Id != tokens.TArray {
+ return nil, errors.NewError(errors.TypeMismatch, p.next().Loc, "expected array in for-in statement found", expr.Resolve().String())
+ }
+
+ pEnv := p.env
+ p.env = NewEnv(pEnv)
+ defer func() {
+ p.env = pEnv
+ }()
+ p.env = p.env.Set(ident.Value, *expr.Resolve().Ret)
+
+ stmts, err := p.forBody()
+ if err != nil {
+ return nil, err
+ }
+ return ast.ForIn{Name: ident, Expr: expr, Stmts: stmts}, nil
+ }
+ p.back()
+ }
+
+ expr, err = p.expr()
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ stmts, err := p.forBody()
+ if err != nil {
+ return nil, err
+ }
+ return ast.For{Cond: expr, Stmts: stmts}, nil
+}
+
+func (p *Parser) ifElse() (ast.Expr, error) {
+ ifTok := p.next()
+ expr, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+ ifBlock, err := p.block()
+ if err != nil {
+ return nil, err
+ }
+
+ var elseBlock ast.Expr
+ if p.check(tokens.Else) {
+ p.next()
+ elseBlock, err = p.block()
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ elseBlock = ast.Literal{Value: tokens.Token{Id: tokens.None, Loc: ifTok.Loc}}
+ }
+ return ast.IfElse{Cond: expr, True: ifBlock, False: elseBlock}, nil
+}
+
+func (p *Parser) strctBlock() (ast.Expr, error) {
+ definitions := make([]ast.Expr, 0, 256)
+
+ if p.next().Id != tokens.LBrace {
+ return nil, errors.NewError(errors.ExpectedLBrace, p.prev().Loc, "'{' expected to open block")
+ }
+
+ for !p.check(tokens.RBrace) && !p.end() {
+ if p.check(tokens.Def) {
+ p.next()
+ decl, err := p.definition()
+ if err != nil {
+ return nil, err
+ }
+ definitions = append(definitions, decl)
+ continue
+ }
+ if p.match([]tokens.TokenId{tokens.Var, tokens.Const}) {
+ decl, err := p.declaration()
+ if err != nil {
+ return nil, err
+ }
+ definitions = append(definitions, decl)
+ continue
+ }
+ return nil, errors.NewError(errors.ExpectedDefinition, p.next().Loc, "expected definition in a struct block")
+ }
+
+ next := p.next()
+ if next.Id != tokens.RBrace {
+ // MUST be EOF
+ return nil, errors.NewError(errors.ExpectedRBrace, next.Loc, "'}' expected to close block, end of file found")
+ }
+
+ return ast.Block{Stmts: definitions}, nil
+}
+
+func (p *Parser) block() (ast.Expr, error) {
+ pEnv := p.env
+ p.env = NewEnv(pEnv)
+ defer func() {
+ p.env = pEnv
+ }()
+
+ statements := make([]ast.Expr, 0, 256)
+
+ if p.next().Id != tokens.LBrace {
+ return nil, errors.NewError(errors.ExpectedLBrace, p.prev().Loc, "'{' expected to open block")
+ }
+
+ for !p.check(tokens.RBrace) && !p.end() {
+ decl, err := p.statement()
+ if err != nil {
+ return nil, err
+ }
+ statements = append(statements, decl)
+ }
+
+ next := p.next()
+ if next.Id != tokens.RBrace {
+ // MUST be EOF
+ return nil, errors.NewError(errors.ExpectedRBrace, next.Loc, "'}' expected to close block, end of file found")
+ }
+
+ return ast.Block{Stmts: statements}, nil
+}
+
+func (p *Parser) assign() (ast.Expr, error) {
+ expr, err := p.or()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.check(tokens.Assign) {
+ tok := p.next()
+ value, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+
+ left := expr
+ env := p.env
+
+ if obj, ok := expr.(ast.GetExpr); ok {
+ oType := obj.Object.Resolve()
+ sEnv, ok := p.structs[oType.Value.Value]
+ if !ok {
+ // shouldn't happen
+ return nil, errors.NewError(errors.Unexpected, oType.Value.Loc, "undeclared struct", oType.String())
+ }
+ env = sEnv
+ expr = obj.Expr
+ }
+
+ v, ok := expr.(ast.Variable)
+ if !ok {
+ return nil, errors.NewError(errors.InvalidTarget, tok.Loc, "invalid assignment target")
+ }
+
+ typ, ok := env.Get(v.Name.Value, false)
+ if !ok {
+ return nil, errors.NewError(errors.UndefinedIdent, v.Name.Loc, "undeclared variable", v.String())
+ }
+ if typ.Value.Id == tokens.TArray && v.Index != nil {
+ typ = *typ.Ret
+ }
+ if !typ.Compare(value.Resolve()) {
+ return nil, errors.NewError(errors.InvalidValue, tok.Loc, "invalid value in assignation, expected", typ.String(), "found", value.Resolve().String())
+ }
+ if typ.Value.Id == tokens.TArray || typ.Value.Id == tokens.TStruct {
+ if !typ.Ref {
+ return nil, errors.NewError(errors.TypeMismatch, v.Name.Loc, "variable cannot be converted to a reference", v.String())
+ }
+ if !typ.ResolveConst() && value.Resolve().ResolveConst() {
+ return nil, errors.NewError(errors.TypeMismatch, v.Name.Loc, "variable cannot be used as referece to constant")
+ }
+ }
+ if typ.Const {
+ return nil, errors.NewError(errors.AssignConst, v.Name.Loc, "value cannot be assigned to constant")
+ }
+ return ast.Assign{Left: left, Right: value, Loc: tok.Loc}, nil
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) or() (ast.Expr, error) {
+ expr, err := p.and()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.check(tokens.Or) {
+ op := p.next()
+ right, err := p.and()
+ if err != nil {
+ return nil, err
+ }
+ expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) and() (ast.Expr, error) {
+ expr, err := p.equality()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.check(tokens.And) {
+ op := p.next()
+ right, err := p.equality()
+ if err != nil {
+ return nil, err
+ }
+ expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) equality() (ast.Expr, error) {
+ expr, err := p.comparison()
+ if err != nil {
+ return nil, err
+ }
+
+ for p.match([]tokens.TokenId{tokens.Eq, tokens.Ne}) {
+ op := p.prev()
+ right, err := p.comparison()
+ if err != nil {
+ return nil, err
+ }
+
+ if !expr.Resolve().Compare(right.Resolve()) {
+ return nil, errors.NewError(errors.TypeMismatch, op.Loc, "type mistmach expected", expr.Resolve().String(), "found", right.Resolve().String())
+ }
+
+ expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) comparison() (ast.Expr, error) {
+ expr, err := p.bitterm()
+ if err != nil {
+ return nil, err
+ }
+
+ for p.match([]tokens.TokenId{tokens.Gt, tokens.Ge, tokens.Lt, tokens.Le}) {
+ op := p.prev()
+ right, err := p.bitterm()
+ if err != nil {
+ return nil, err
+ }
+
+ if !expr.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String())
+ }
+ if !right.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String())
+ }
+
+ expr = ast.Binary{Left: expr, Op: op, Right: right, Type: &ast.TypeBool}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) bitterm() (ast.Expr, error) {
+ expr, err := p.term()
+ if err != nil {
+ return nil, err
+ }
+
+ for p.match([]tokens.TokenId{tokens.BitShl, tokens.BitShr, tokens.BitOr, tokens.BitXor}) {
+ op := p.prev()
+ right, err := p.term()
+ if err != nil {
+ return nil, err
+ }
+
+ if !expr.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String())
+ }
+ if !right.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String())
+ }
+
+ expr = ast.Binary{Left: expr, Op: op, Right: right}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) term() (ast.Expr, error) {
+ expr, err := p.bitfactor()
+ if err != nil {
+ return nil, err
+ }
+
+ for p.match([]tokens.TokenId{tokens.Sub, tokens.Add}) {
+ op := p.prev()
+ right, err := p.bitfactor()
+ if err != nil {
+ return nil, err
+ }
+
+ if !expr.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String())
+ }
+ if !right.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String())
+ }
+
+ expr = ast.Binary{Left: expr, Op: op, Right: right}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) bitfactor() (ast.Expr, error) {
+ expr, err := p.factor()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.check(tokens.BitAnd) {
+ op := p.next()
+ right, err := p.factor()
+ if err != nil {
+ return nil, err
+ }
+
+ if !expr.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String())
+ }
+ if !right.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String())
+ }
+
+ expr = ast.Binary{Left: expr, Op: op, Right: right}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) factor() (ast.Expr, error) {
+ expr, err := p.unary()
+ if err != nil {
+ return nil, err
+ }
+
+ for p.match([]tokens.TokenId{tokens.Div, tokens.Mul, tokens.Mod}) {
+ op := p.prev()
+ right, err := p.unary()
+ if err != nil {
+ return nil, err
+ }
+
+ if !expr.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", expr.Resolve().String())
+ }
+ if !right.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String())
+ }
+
+ expr = ast.Binary{Left: expr, Op: op, Right: right}
+ }
+
+ return expr, nil
+}
+
+func (p *Parser) unary() (ast.Expr, error) {
+ if p.match([]tokens.TokenId{tokens.TestE, tokens.Not, tokens.Neg, tokens.Sub}) {
+ op := p.prev()
+ right, err := p.unary()
+ if err != nil {
+ return nil, err
+ }
+ if (op.Id == tokens.Sub || op.Id == tokens.Neg) && right.Resolve().Value.Id != tokens.TNumber {
+ return nil, errors.NewError(errors.InvalidOperation, op.Loc, "invalid operation for type", right.Resolve().String())
+ }
+ return ast.Unary{Op: op, Right: right}, nil
+ }
+
+ return p.call()
+}
+
+func (p *Parser) call() (ast.Expr, error) {
+ expr, err := p.primary()
+ if err != nil {
+ return nil, err
+ }
+
+ for {
+ if p.check(tokens.LBracket) {
+ loc := p.next().Loc
+
+ if expr.Resolve().Value.Id != tokens.TArray {
+ return nil, errors.NewError(errors.NotIndexable, loc, "value is not indexable")
+ }
+
+ index, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+ if !index.Resolve().Compare(&ast.TypeNumber) {
+ return nil, errors.NewError(errors.InvalidValue, p.prev().Loc, "invalid index value")
+ }
+
+ if p.next().Id != tokens.RBracket {
+ return nil, errors.NewError(errors.ExpectedRBracket, p.prev().Loc, "']' expected to close index")
+ }
+
+ switch v := expr.(type) {
+ case ast.Variable:
+ v.Index = index
+ expr = v
+ case ast.GetExpr:
+ val := v.Expr.(ast.Variable)
+ val.Index = index
+ v.Expr = val
+ expr = v
+ default:
+ // shouldn't happen
+ return nil, errors.NewError(errors.Unexpected, loc, "value is not indexable")
+ }
+ continue
+ }
+
+ if p.check(tokens.Dot) {
+ loc := p.next().Loc
+ prop := p.next()
+ if prop.Id != tokens.Ident {
+ return nil, errors.NewError(errors.ExpectedIdent, prop.Loc, "expected identifier")
+ }
+ sType := expr.Resolve()
+ if sType.Value.Id != tokens.TStruct {
+ return nil, errors.NewError(errors.NotStruct, loc, "property get on a non struct value")
+ }
+
+ sEnv, ok := p.structs[sType.Value.Value]
+ if !ok {
+ // shouldn't happen
+ return nil, errors.NewError(errors.Unexpected, sType.Value.Loc, "undeclared struct", sType.String())
+ }
+ pType, ok := sEnv.Get(prop.Value, true)
+ if !ok {
+ return nil, errors.NewError(errors.UndefinedIdent, loc, "property", prop.String(), "not defined in", sType.String())
+ }
+
+ expr = ast.GetExpr{Object: expr, Expr: ast.Variable{Name: prop, Type: pType}}
+ continue
+ }
+
+ if p.check(tokens.LParen) {
+ p.next()
+ args := make([]ast.Expr, 0, 16)
+ if !p.check(tokens.RParen) {
+ for {
+ arg, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+ args = append(args, arg)
+ if !p.check(tokens.Comma) {
+ break
+ }
+ p.next()
+ }
+ if !p.check(tokens.RParen) {
+ return nil, errors.NewError(errors.ExpectedRParen, p.prev().Loc, "')' expected to end function call")
+ }
+ }
+ typ := expr.Resolve()
+ if typ.Value.Id != tokens.TFunc {
+ return nil, errors.NewError(errors.NotCallable, p.next().Loc, "value is not callable")
+ }
+
+ // mostly for println that doesn't have type checks
+ // and will consume any number of arguments
+ if typ.Params != nil {
+ if len(typ.Params) != len(args) {
+ return nil, errors.NewError(errors.CallError, p.next().Loc, fmt.Sprintf("invalid number of arguments in function call: %d expected %d found", len(typ.Params), len(args)))
+ }
+ for n, arg := range args {
+ if !typ.Params[n].Compare(arg.Resolve()) {
+ return nil, errors.NewError(errors.TypeMismatch, p.next().Loc, fmt.Sprintf("type mismatch in function call argument %d: expected %s found %s", n+1, typ.Params[n].String(), arg.Resolve().String()))
+ }
+ }
+ }
+ expr = ast.Call{Callee: expr, Loc: p.next().Loc, Args: args}
+ continue
+ }
+ break
+ }
+ return expr, nil
+}
+
+func (p *Parser) primary() (ast.Expr, error) {
+ if p.match([]tokens.TokenId{tokens.True, tokens.False}) {
+ return ast.Literal{Value: p.prev(), Type: ast.TypeBool}, nil
+ } else if p.check(tokens.String) {
+ return ast.Literal{Value: p.next(), Type: ast.TypeString}, nil
+ } else if p.check(tokens.Ident) {
+ value := p.next()
+
+ typc, ok := p.env.Get(value.Value, false)
+ if !ok {
+ return nil, errors.NewError(errors.UndefinedIdent, value.Loc, "undeclared variable", value.String())
+ }
+ if typc.Value.Id == tokens.TStruct && !typc.Inst {
+ return nil, errors.NewError(errors.InvalidValue, value.Loc, value.String(), "is not an instance")
+ }
+
+ return ast.Variable{Name: value, Type: typc}, nil
+ } else if p.check(tokens.Char) {
+ value := p.next()
+ value.Id = tokens.Number
+ nval := byte(value.Value[0])
+ return ast.Literal{Value: value, Numval: ast.Number(nval), Type: ast.TypeNumber}, nil
+ } else if p.check(tokens.Number) {
+ value := p.next()
+ nval, err := strconv.ParseInt(value.Value, 0, 64)
+ if err != nil {
+ return nil, errors.Error{
+ Code: errors.FailedNumeric,
+ Message: fmt.Sprintf("[%s] failed to parse numeric value", value.Loc),
+ Err: err,
+ }
+ }
+ return ast.Literal{Value: value, Numval: ast.Number(nval), Type: ast.TypeNumber}, nil
+ } else if p.check(tokens.LParen) {
+ p.next()
+ expr, err := p.expr()
+ if err != nil {
+ return nil, err
+ }
+ next := p.next()
+ if next.Id != tokens.RParen {
+ found := fmt.Sprintf("'%v'", next.Value)
+ if next.Id == tokens.Eof {
+ found = "end of file"
+ }
+ return nil, errors.NewError(errors.ExpectedRParen, next.Loc, "')' expected after expression,", found, "found")
+ }
+ return ast.Group{Expr: expr}, nil
+ }
+ return nil, errors.NewError(errors.ExpectedExpr, p.curr().Loc, "expression expected")
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go
new file mode 100644
index 0000000..8e97146
--- /dev/null
+++ b/parser/parser_test.go
@@ -0,0 +1,250 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+
+ "usebox.net/lang/ast"
+ "usebox.net/lang/errors"
+ "usebox.net/lang/tokenizer"
+)
+
+func parse(input string) ([]ast.Expr, error) {
+ tr := tokenizer.NewTokenizer("-", strings.NewReader(input))
+ toks, err := tr.Scan()
+ if err != nil {
+ return nil, err
+ }
+ p := NewParser(nil)
+ return p.Parse(toks)
+}
+
+func expectError(t *testing.T, input string, code errors.ErrCode) {
+ _, errs := parse(input)
+ if errs == nil {
+ t.Errorf("expected error and it didn't happen (input: %s)", input)
+ }
+ el, ok := errs.(errors.ErrorList)
+ if !ok {
+ t.Errorf("error list expected (input: %s)", input)
+ } else if len(el.Errors) == 0 {
+ t.Errorf("error list is empty (input: %s)", input)
+ } else {
+ if first, ok := el.Errors[0].(errors.Error); !ok {
+ t.Errorf("error in error list not an Error (input: %s)", input)
+ } else {
+ if first.Code != code {
+ t.Errorf("error %d expected, got %d -> %s (input: %s)", code, first.Code, first, input)
+ }
+ }
+ }
+}
+
+func TestErrorFailedNumeric(t *testing.T) {
+ expectError(t, "var n number = 18446744073709551616;", errors.FailedNumeric)
+}
+
+func TestErrorExpectedRParen(t *testing.T) {
+ expectError(t, "var n number = (1 + 2 ;", errors.ExpectedRParen)
+}
+
+func TestErrorExpectedExpr(t *testing.T) {
+ expectError(t, ";", errors.ExpectedExpr)
+}
+
+func TestErrorExpectedVarName(t *testing.T) {
+ expectError(t, "var number = 1;", errors.ExpectedIdent)
+ expectError(t, "const false;", errors.ExpectedIdent)
+}
+
+func TestErrorExpectedType(t *testing.T) {
+ expectError(t, "var n = 1;", errors.ExpectedType)
+ expectError(t, "const n = 1;", errors.ExpectedType)
+ expectError(t, "def fn(n) { }", errors.ExpectedType)
+ expectError(t, "def fn() { }\nvar a number = fn();", errors.TypeMismatch)
+}
+
+func TestErrorDeclareUndefined(t *testing.T) {
+ expectError(t, "var a number = a;", errors.UndefinedIdent)
+}
+
+func TestErrorExpectedSemiColon(t *testing.T) {
+ expectError(t, "var n number = 1", errors.ExpectedSemicolon)
+ expectError(t, "1 + 1", errors.ExpectedSemicolon)
+}
+
+func TestErrorExpectedFunc(t *testing.T) {
+ expectError(t, "def ;", errors.ExpectedIdent)
+ expectError(t, "def fn;", errors.ExpectedLParen)
+ expectError(t, "def fn(number);", errors.ExpectedIdent)
+ expectError(t, "def fn(a number, number);", errors.ExpectedIdent)
+ expectError(t, "def fn(a, b);", errors.ExpectedType)
+ expectError(t, "def fn(a bool, b);", errors.ExpectedType)
+ expectError(t, "def fn() id;", errors.ExpectedType)
+ expectError(t, "def fn(a number) id;", errors.ExpectedType)
+ expectError(t, "def fn()", errors.ExpectedType)
+ expectError(t, "def fn() {", errors.ExpectedRBrace)
+ expectError(t, "def fn() number {", errors.ExpectedRBrace)
+ expectError(t, "fn(;", errors.UndefinedIdent)
+ expectError(t, "fn(0;", errors.UndefinedIdent)
+ expectError(t, "fn()", errors.UndefinedIdent)
+ expectError(t, "def fn() bool { return 1; }", errors.TypeMismatch)
+ expectError(t, "def fn() { }\ndef fn() { }", errors.AlreadyDeclared)
+}
+
+func TestErrorReturn(t *testing.T) {
+ expectError(t, "return", errors.InvalidOperation)
+ expectError(t, "return true", errors.InvalidOperation)
+ expectError(t, "def fn() { return }", errors.ExpectedExpr)
+ expectError(t, "def fn() { return true }", errors.ExpectedSemicolon)
+ expectError(t, `
+ def fn() [5]number {
+ var local [5]number;
+ return local;
+ }
+ `, errors.InvalidOperation)
+ expectError(t, `
+ def A { var p number; }
+ def fn() A {
+ var local A;
+ return local;
+ }
+ `, errors.InvalidOperation)
+ expectError(t, `
+ def A { var p number; }
+ var g A;
+ def fn() A {
+ var local A;
+ local = g;
+ return local;
+ }
+ "OK";
+ `, errors.TypeMismatch)
+ expectError(t, `
+ var g [3]bool;
+ def fn() [3]bool {
+ var local [3]bool;
+ local = g;
+ return local;
+ }
+ "OK";
+ `, errors.TypeMismatch)
+ expectError(t, `
+ const a [3]number = [1, 2, 3];
+ var b [3]number = a;
+ `, errors.TypeMismatch)
+ expectError(t, `
+ const a [3]number = [1, 2, 3];
+ var b [3]number;
+ var c [3]number = b;
+ c = a;
+ `, errors.TypeMismatch)
+}
+
+func TestErrorExpectedAssign(t *testing.T) {
+ expectError(t, "a = ", errors.UndefinedIdent)
+ expectError(t, "1 = 1;", errors.InvalidTarget)
+ expectError(t, "const a number = 0; a = 1;", errors.AssignConst)
+ expectError(t, "const a [2]number = [0, 1]; a[0] = 1;", errors.AssignConst)
+ expectError(t, `
+ def fn() { }
+ const a func () = fn;
+ a = fn;
+ `, errors.AssignConst)
+}
+
+func TestErrorLoops(t *testing.T) {
+ expectError(t, "continue;", errors.InvalidOperation)
+ expectError(t, "break;", errors.InvalidOperation)
+}
+
+func TestErrorArray(t *testing.T) {
+ expectError(t, "var a [false]number;", errors.InvalidValue)
+ expectError(t, "var a [1]number = 1;", errors.TypeMismatch)
+ expectError(t, "var a []number = [1, 2, 3];", errors.ExpectedExpr)
+ expectError(t, "var a [2]number; var b [5]number = a;", errors.TypeMismatch)
+}
+
+func TestErrorExprList(t *testing.T) {
+ expectError(t, "var a number = [1];", errors.InvalidValue)
+ expectError(t, "var a [1]number = [];", errors.InvalidValue)
+ expectError(t, "var a [2]number = [1, false];", errors.TypeMismatch)
+ expectError(t, "var a [2]number = [1, 2, 3];", errors.InvalidValue)
+ expectError(t, "var a [1]number = [false];", errors.TypeMismatch)
+ expectError(t, "const a number = [1];", errors.InvalidValue)
+ expectError(t, "const a [1]number = [];", errors.InvalidValue)
+ expectError(t, "const a [2]number = [1, false];", errors.TypeMismatch)
+ expectError(t, "const a [2]number = [1, 2, 3];", errors.InvalidValue)
+ expectError(t, "const a [1]number = [false];", errors.TypeMismatch)
+}
+
+func TestErrorIndex(t *testing.T) {
+ expectError(t, "var a [10]number; a[false];", errors.InvalidValue)
+ expectError(t, "var a [10]number; a[\"bad\"];", errors.InvalidValue)
+ expectError(t, "var i bool; var a [10]number; a[i];", errors.InvalidValue)
+ expectError(t, "def fn() { } var a [10]number; a[fn];", errors.InvalidValue)
+ expectError(t, "def fn() bool { return false; } var a [10]number; a[fn()];", errors.InvalidValue)
+ expectError(t, "false[0];", errors.NotIndexable)
+ expectError(t, "10[0];", errors.NotIndexable)
+ expectError(t, "false[0] = 10;", errors.NotIndexable)
+}
+
+func TestErrorFor(t *testing.T) {
+ expectError(t, "for a { }", errors.UndefinedIdent)
+ expectError(t, "for a != 0 { }", errors.UndefinedIdent)
+ expectError(t, "for a in { }", errors.ExpectedExpr)
+ expectError(t, "for a in a { }", errors.UndefinedIdent)
+ expectError(t, "var a number; for i in a { }", errors.TypeMismatch)
+}
+
+func TestErrorStruct(t *testing.T) {
+ expectError(t, "def A { }", errors.InvalidValue)
+ expectError(t, "var a number; a.b;", errors.NotStruct)
+ expectError(t, "def A { var a number; }\nvar a A; a.1;", errors.ExpectedIdent)
+ expectError(t, "def A { var a number; }\ndef A { }", errors.AlreadyDeclared)
+ expectError(t, "def A { var a A; }", errors.RecursiveStruct)
+ expectError(t, "def A { var a number; }\nvar a A;\na.x;", errors.UndefinedIdent)
+ expectError(t, "def A { var a number; }\nA.a;", errors.InvalidValue)
+ expectError(t, "def A { var a number; }\nvar a number; a = A;", errors.InvalidValue)
+ expectError(t, "def A { var a number; }\nvar a A; var b number = a;", errors.TypeMismatch)
+ expectError(t, "def A { var a number; }\nvar a A; var b a;", errors.ExpectedType)
+ expectError(t, `
+ def A { var a number; }
+ def B { var a number; }
+ var a A;
+ var b B = a;
+ `, errors.TypeMismatch)
+ expectError(t, `
+ def fn() {
+ def A {
+ return;
+ }
+ }
+ `, errors.ExpectedDefinition)
+ expectError(t, `
+ for {
+ def A {
+ break;
+ }
+ }
+ `, errors.ExpectedDefinition)
+ expectError(t, `
+ for {
+ def A {
+ continue;
+ }
+ }
+ `, errors.ExpectedDefinition)
+ expectError(t, `
+ def A {
+ var p number;
+ p = 10;
+ }
+ `, errors.ExpectedDefinition)
+ expectError(t, `
+ def A {
+ var p number;
+ println("error");
+ }
+ `, errors.ExpectedDefinition)
+}
diff --git a/tokenizer/tokenizer.go b/tokenizer/tokenizer.go
new file mode 100644
index 0000000..78616c6
--- /dev/null
+++ b/tokenizer/tokenizer.go
@@ -0,0 +1,356 @@
+package tokenizer
+
+import (
+ "bufio"
+ goerr "errors"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "unicode"
+
+ "usebox.net/lang/errors"
+ "usebox.net/lang/tokens"
+)
+
+type rTokenIdMap map[rune]tokens.TokenId
+type sTokenIdMap map[string]tokens.TokenId
+
+var (
+ keywords = sTokenIdMap{
+ "true": tokens.True,
+ "false": tokens.False,
+ "var": tokens.Var,
+ "const": tokens.Const,
+ "def": tokens.Def,
+ "return": tokens.Return,
+ "number": tokens.TNumber,
+ "bool": tokens.TBool,
+ "string": tokens.TString,
+ "func": tokens.TFunc,
+ "if": tokens.If,
+ "else": tokens.Else,
+ "for": tokens.For,
+ "in": tokens.In,
+ "continue": tokens.Continue,
+ "break": tokens.Break,
+ }
+
+ doubleChar = map[rune]struct {
+ second rTokenIdMap
+ }{
+ '|': {rTokenIdMap{'|': tokens.Or}},
+ '&': {rTokenIdMap{'&': tokens.And}},
+ '=': {rTokenIdMap{'=': tokens.Eq}},
+ '!': {rTokenIdMap{'=': tokens.Ne, '?': tokens.TagE}},
+ '>': {rTokenIdMap{'=': tokens.Ge, '>': tokens.BitShr}},
+ '<': {rTokenIdMap{'=': tokens.Le, '<': tokens.BitShl}},
+ }
+
+ singleChar = rTokenIdMap{
+ '(': tokens.LParen,
+ ')': tokens.RParen,
+ '+': tokens.Add,
+ '-': tokens.Sub,
+ '*': tokens.Mul,
+ '%': tokens.Mod,
+ '/': tokens.Div,
+ '>': tokens.Gt,
+ '<': tokens.Lt,
+ '!': tokens.Not,
+ '~': tokens.Neg,
+ '|': tokens.BitOr,
+ '&': tokens.BitAnd,
+ '^': tokens.BitXor,
+ '=': tokens.Assign,
+ '.': tokens.Dot,
+ ';': tokens.Semicolon,
+ ',': tokens.Comma,
+ '{': tokens.LBrace,
+ '}': tokens.RBrace,
+ '[': tokens.LBracket,
+ ']': tokens.RBracket,
+ '?': tokens.TestE,
+ }
+)
+
+type Tokenizer struct {
+ input bufio.Reader
+
+ loc tokens.Location
+}
+
+func NewTokenizer(filename string, input io.Reader) *Tokenizer {
+ return &Tokenizer{input: *bufio.NewReader(input), loc: tokens.Location{File: filename, Line: 1, Column: 1}}
+}
+
+func (t *Tokenizer) peek() (rune, error) {
+ r, _, err := t.input.ReadRune()
+ if err != nil {
+ t.input.UnreadRune()
+ return rune(0), err
+ }
+ if err := t.input.UnreadRune(); err != nil {
+ return rune(0), err
+ }
+ return r, nil
+}
+
+func (t *Tokenizer) read() (rune, error) {
+ r, _, err := t.input.ReadRune()
+ if err != nil {
+ return rune(0), err
+ }
+ return r, nil
+}
+
+func (t *Tokenizer) unread() error {
+ return t.input.UnreadRune()
+}
+
+func (t *Tokenizer) skipWhitespace() (rune, error) {
+ var r rune
+ var err error
+loop:
+ for {
+ r, err = t.read()
+ if err != nil {
+ break
+ }
+ switch {
+ case !unicode.IsSpace(r):
+ break loop
+ case r == '\n':
+ t.loc.Eol()
+ default:
+ t.loc.Inc()
+ }
+ }
+ return r, err
+}
+
+func (t *Tokenizer) extract(filter func(rune) bool) (string, error) {
+ var buf strings.Builder
+ for {
+ r, err := t.peek()
+ // EOF, error or end of extraction
+ if err == io.EOF || !filter(r) {
+ t.loc.Add(buf.Len())
+ return buf.String(), nil
+ } else if err != nil {
+ return "", err
+ } else {
+ t.read()
+ }
+ if _, err = buf.WriteRune(r); err != nil {
+ return "", err
+ }
+ }
+}
+
+func (t *Tokenizer) extractEscaped(end rune) (string, error) {
+ var buf strings.Builder
+ for {
+ r, err := t.read()
+ t.loc.Inc()
+ if r == end {
+ return buf.String(), nil
+ } else if err == io.EOF {
+ return "", fmt.Errorf("EOF found before closing '%c'", end)
+ } else if err != nil {
+ return "", err
+ } else {
+ if r == '\\' {
+ r, err = t.read()
+ if err == io.EOF {
+ return "", goerr.New("EOF found before completing escape sequence")
+ } else if err != nil {
+ return "", err
+ }
+ switch r {
+ case 'n':
+ r = '\n'
+ case 't':
+ r = '\t'
+ case '\\', end:
+ case 'x':
+ count := 0
+ value, err := t.extract(func(r rune) bool {
+ defer func() {
+ count++
+ }()
+ return unicode.In(r, unicode.ASCII_Hex_Digit) && count != 2
+ })
+ if err != nil {
+ return "", err
+ }
+ if len(value) != 2 {
+ return "", goerr.New("invalid escape sequence")
+ }
+ nval, err := strconv.ParseInt(value, 16, 8)
+ if err != nil {
+ return "", goerr.New("invalid escape sequence")
+ }
+ r = rune(byte(nval))
+ default:
+ return "", goerr.New("invalid escape sequence")
+ }
+ t.loc.Inc()
+ }
+ }
+ if _, err = buf.WriteRune(r); err != nil {
+ return "", err
+ }
+ }
+}
+
+func (t *Tokenizer) token() (tokens.Token, error) {
+ var tok tokens.Token
+ var r rune
+ var err error
+ var loc tokens.Location
+
+ for {
+ r, err = t.skipWhitespace()
+ loc = t.loc
+ if err == io.EOF {
+ return tokens.Token{Id: tokens.Eof, Loc: loc}, nil
+ } else if err != nil {
+ return tok, err
+ }
+
+ // comments
+ if r == '/' {
+ if p, err := t.peek(); err == nil && p == '/' {
+ for {
+ r, err = t.read()
+ if err != nil {
+ // could be EOF
+ break
+ }
+ if r == '\n' {
+ t.loc.Eol()
+ break
+ }
+ }
+ // check for whitespace
+ continue
+ }
+ }
+ // not a comment, skipped whitespace,
+ // now process this token
+ break
+ }
+
+ // identifier or keywords
+ if unicode.IsLetter(r) || r == '_' {
+ t.unread()
+ value, err := t.extract(func(r rune) bool {
+ return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_'
+ })
+ if err != nil {
+ return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error())
+ }
+ // match keywords
+ if tokenId, ok := keywords[value]; ok {
+ return tokens.Token{Id: tokenId, Loc: loc, Value: value}, nil
+ }
+ // otherwise is an identitier
+ return tokens.Token{Id: tokens.Ident, Loc: loc, Value: value}, nil
+ }
+
+ // character literal
+ if r == '\'' {
+ t.loc.Inc()
+ value, err := t.extractEscaped('\'')
+ if err != nil {
+ return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error())
+ }
+ if len(value) != 1 {
+ return tok, errors.NewError(errors.SyntaxError, t.loc, "invalid character literal")
+ }
+ return tokens.Token{Id: tokens.Char, Loc: loc, Value: value}, nil
+ }
+
+ // numeric literal
+ if unicode.IsDigit(r) {
+ t.unread()
+ pos := 0
+ hex := false
+ bin := false
+ value, err := t.extract(func(r rune) bool {
+ if pos == 1 && r == 'x' {
+ hex = true
+ pos++
+ return true
+ }
+ if pos == 1 && r == 'b' {
+ bin = true
+ pos++
+ return true
+ }
+ if hex && unicode.In(r, unicode.ASCII_Hex_Digit) {
+ pos++
+ return true
+ }
+ if bin && r != '0' && r != '1' {
+ return false
+ }
+ pos++
+ return unicode.IsDigit(r)
+ })
+ if (hex || bin) && len(value) == 2 {
+ err = fmt.Errorf("invalid numeric format '%s'", value)
+ }
+ if err != nil {
+ return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error())
+ }
+ return tokens.Token{Id: tokens.Number, Loc: loc, Value: value}, nil
+ }
+
+ // string literal
+ if r == '"' {
+ t.loc.Inc()
+ value, err := t.extractEscaped('"')
+ if err != nil {
+ return tok, errors.NewErrorWrap(errors.SyntaxError, t.loc, err, err.Error())
+ }
+ return tokens.Token{Id: tokens.String, Loc: loc, Value: value}, nil
+ }
+
+ // two character symbols
+ if d, ok := doubleChar[r]; ok {
+ if p, err := t.peek(); err == nil {
+ if second, ok := d.second[p]; ok {
+ t.read()
+ t.loc.Add(2)
+ // FIXME: value
+ return tokens.Token{Id: second, Loc: loc, Value: string(r) + string(p)}, nil
+ }
+ }
+ }
+
+ // single character symbols
+ if tokenId, ok := singleChar[r]; ok {
+ t.loc.Inc()
+ return tokens.Token{Id: tokenId, Loc: loc, Value: string(r)}, nil
+ }
+
+ // invalid token
+ return tok, errors.NewError(errors.SyntaxError, t.loc, "invalid character:", fmt.Sprintf("'%c'", r))
+}
+
+func (t *Tokenizer) Scan() ([]tokens.Token, error) {
+ var ts []tokens.Token
+
+ for {
+ token, err := t.token()
+ if err != nil {
+ return nil, err
+ }
+ ts = append(ts, token)
+ if token.Id == tokens.Eof {
+ return ts, nil
+ }
+ }
+}
diff --git a/tokenizer/tokenizer_test.go b/tokenizer/tokenizer_test.go
new file mode 100644
index 0000000..47afebb
--- /dev/null
+++ b/tokenizer/tokenizer_test.go
@@ -0,0 +1,310 @@
+package tokenizer
+
+import (
+ "strings"
+ "testing"
+
+ "usebox.net/lang/tokens"
+)
+
+func TestSkipWhitespace(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(" \t\r\n"))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Eof {
+ t.Errorf("Eof expected, got %s", tok)
+ }
+ if tok.Loc.Line != 2 {
+ t.Errorf("line == 2 expected, got %s", tok.Loc)
+ }
+ if tok.Loc.Column != 1 {
+ t.Errorf("column == 1 expected, got %s", tok.Loc)
+ }
+}
+
+func TestComments(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ input string
+ line int
+ col int
+ }{
+ {"single comment", "// a comment\n", 2, 1},
+ {"ignore tokens", "// 1 + 2 is ignored\n", 2, 1},
+ {"skip whitespace", " // a comment\n", 2, 1},
+ {"comment to eof", "\n// comment to eof", 2, 1},
+ {"multiple comments", "// comment\n// another comment", 2, 1},
+ {"whitespace before comment", "\t// comment with whitespace\n\t// comment\n", 3, 1},
+ {"unicode", "// こんにちは\n", 2, 1},
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt.input))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Eof {
+ t.Errorf("Eof expected, got %s", tok)
+ }
+ if tok.Loc.Line != tt.line {
+ t.Errorf("line == %d expected, got %s", tt.line, tok.Loc)
+ }
+ if tok.Loc.Column != tt.col {
+ t.Errorf("column == %d expected, got %s", tt.col, tok.Loc)
+ }
+ })
+ }
+}
+
+func TestIdent(t *testing.T) {
+ for _, tt := range []string{
+ "ident",
+ "MyIdent",
+ "ident2",
+ "名前",
+ "__add",
+ } {
+ t.Run(tt, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Ident {
+ t.Errorf("Ident expected, got %s", tok)
+ }
+ if tok.Value != tt {
+ t.Errorf("value == %s expected, got %s", tt, tok.Value)
+ }
+ })
+ }
+}
+
+func TestMultipleCalls(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("// comment\nident // with a comment\nanother\n"))
+ for _, tt := range []string{
+ "ident", "another",
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Ident {
+ t.Errorf("Ident expected, got %s", tok)
+ }
+ if tok.Value != tt {
+ t.Errorf("value == %s expected, got %s", tt, tok.Value)
+ }
+ }
+}
+
+func TestKeywords(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("true false var const def return number bool string func if else for in continue break"))
+ for _, tt := range []tokens.TokenId{
+ tokens.True,
+ tokens.False,
+ tokens.Var,
+ tokens.Const,
+ tokens.Def,
+ tokens.Return,
+ tokens.TNumber,
+ tokens.TBool,
+ tokens.TString,
+ tokens.TFunc,
+ tokens.If,
+ tokens.Else,
+ tokens.For,
+ tokens.In,
+ tokens.Continue,
+ tokens.Break,
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tt {
+ t.Errorf("%s expected, got %s", tt, tok)
+ }
+ }
+}
+
+func TestNumber(t *testing.T) {
+ for _, tt := range []string{
+ "1234",
+ "0x4d2",
+ "0b10011010010",
+ } {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Number {
+ t.Errorf("Number expected, got %s", tok)
+ }
+ if tok.Value != tt {
+ t.Errorf("value == %s expected, got '%s'", tt, tok.Value)
+ }
+ }
+}
+
+func TestCharacter(t *testing.T) {
+ for tt, e := range map[string]string{
+ "'a'": "a",
+ "'0'": "0",
+ "'\\''": "'",
+ "' '": " ",
+ "'\\n'": "\n",
+ "'\\x0d'": "\r",
+ } {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.Char {
+ t.Errorf("Number expected, got %s", tok)
+ }
+ if tok.Value != e {
+ t.Errorf("value == %s expected, got '%s'", e, tok.Value)
+ }
+ }
+}
+
+func TestErrorCharacter(t *testing.T) {
+ for _, tt := range []string{
+ "'12'",
+ "''",
+ "'\\'",
+ "'A",
+ "'世'",
+ "'\\x0'",
+ } {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ _, err := tzr.token()
+ if err == nil {
+ t.Errorf("expected error, didn't happen (input: %s)", tt)
+ }
+ }
+}
+
+func TestString(t *testing.T) {
+ for tt, v := range map[string]string{
+ "\"this is a string\"": "this is a string",
+ "\"0.1234\"": "0.1234",
+ "\"\\\"escaped\\\" string\"": "\"escaped\" string",
+ "\"\\n\\x0d\\t\"": "\n\r\t",
+ "\"Multiline\\nstring\"": "Multiline\nstring",
+ } {
+ t.Run(tt, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt))
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tokens.String {
+ t.Errorf("String expected, got \"%s\"", tok)
+ }
+ if tok.Value != v {
+ t.Errorf("value == %s expected, got \"%s\"", tt, tok.Value)
+ }
+ })
+ }
+}
+
+func TestErrorStrnig(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("\"string and EOF"))
+ _, err := tzr.token()
+ if err == nil {
+ t.Errorf("expected error, didn't happen")
+ }
+}
+
+func TestSingleChar(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("{ } ( ) [ ] ; , + - * % / . = > < ! ~ | & ^ ?"))
+ for _, tt := range []tokens.TokenId{
+ tokens.LBrace,
+ tokens.RBrace,
+ tokens.LParen,
+ tokens.RParen,
+ tokens.LBracket,
+ tokens.RBracket,
+ tokens.Semicolon,
+ tokens.Comma,
+ tokens.Add,
+ tokens.Sub,
+ tokens.Mul,
+ tokens.Mod,
+ tokens.Div,
+ tokens.Dot,
+ tokens.Assign,
+ tokens.Gt,
+ tokens.Lt,
+ tokens.Not,
+ tokens.Neg,
+ tokens.BitOr,
+ tokens.BitAnd,
+ tokens.BitXor,
+ tokens.TestE,
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tt {
+ t.Errorf("%s expected, got %s", tt, tok)
+ }
+ }
+}
+
+func TestDoubleChar(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader("|| && == != >= <= >> << !?"))
+ for _, tt := range []tokens.TokenId{
+ tokens.Or,
+ tokens.And,
+ tokens.Eq,
+ tokens.Ne,
+ tokens.Ge,
+ tokens.Le,
+ tokens.BitShr,
+ tokens.BitShl,
+ tokens.TagE,
+ } {
+ tok, err := tzr.token()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if tok.Id != tt {
+ t.Errorf("%s expected, got %s", tt, tok)
+ }
+ }
+}
+
+func TestScan(t *testing.T) {
+ for _, tt := range []struct {
+ name string
+ input string
+ ntokens int
+ }{
+ {"single line", "1 + 2", 4},
+ {"multiple lines", "1 + 2\nident", 5},
+ {"line starts with whitespace", "1 + 2\n\tident", 5},
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ tzr := NewTokenizer("-", strings.NewReader(tt.input))
+ ts, err := tzr.Scan()
+ if err != nil {
+ t.Errorf("unexpected error %s", err)
+ }
+ if len(ts) != tt.ntokens {
+ t.Errorf("%d tokens expected, got %d", tt.ntokens, len(ts))
+ }
+ last := ts[len(ts)-1]
+ if last.Id != tokens.Eof {
+ t.Errorf("last token expected to be Eof, got %s", last)
+ }
+ })
+ }
+}
diff --git a/tokens/tokens.go b/tokens/tokens.go
new file mode 100644
index 0000000..e063730
--- /dev/null
+++ b/tokens/tokens.go
@@ -0,0 +1,186 @@
+package tokens
+
+import (
+ "encoding/json"
+ "fmt"
+)
+
+type TokenId int
+
+func (t TokenId) String() string {
+ return tokenStr[t]
+}
+
+func (t TokenId) MarshalJSON() ([]byte, error) {
+ return json.Marshal(t.String())
+}
+
+const (
+ Eof TokenId = iota
+ Ident
+
+ None
+ True
+ False
+ Var
+ Const
+ Def
+ TNumber
+ TBool
+ TString
+ TFunc
+ TStruct
+ TArray
+ Return
+ If
+ Else
+ For
+ In
+ Continue
+ Break
+
+ Number
+ Char
+ String
+
+ Or
+ And
+ Eq
+ Ne
+ Ge
+ Le
+ BitShr
+ BitShl
+ TagE
+
+ LBrace
+ RBrace
+ LParen
+ RParen
+ LBracket
+ RBracket
+ Semicolon
+ Comma
+ Add
+ Sub
+ Mul
+ Mod
+ Div
+ Neg
+ Dot
+ Assign
+ Gt
+ Lt
+ Not
+ BitOr
+ BitAnd
+ BitXor
+ TestE
+)
+
+var (
+ tokenStr = []string{
+ "Eof",
+ "Ident",
+
+ // keywords
+ "none",
+ "True",
+ "False",
+ "Var",
+ "Const",
+ "Def",
+ "number",
+ "bool",
+ "string",
+ "func",
+ "struct",
+ "array",
+ "Return",
+ "If",
+ "Else",
+ "For",
+ "In",
+ "Continue",
+ "Break",
+
+ // literals
+ "Number",
+ "Character",
+ "String",
+
+ // double char
+ "Or",
+ "And",
+ "Eq",
+ "NE",
+ "Ge",
+ "Le",
+ "BitShr",
+ "BitShl",
+ "TagE",
+
+ // single character
+ "LBrace",
+ "RBrace",
+ "LParen",
+ "RParen",
+ "LBracket",
+ "RBracket",
+ "Semicolon",
+ "Comma",
+ "Add",
+ "Sub",
+ "Mul",
+ "Mod",
+ "Div",
+ "Neg",
+ "Dot",
+ "Assign",
+ "Gt",
+ "Lt",
+ "Not",
+ "BitOr",
+ "BitAnd",
+ "BitXor",
+ "TestE",
+ }
+)
+
+type Location struct {
+ File string `json:"file"`
+ Line int `json:"line"`
+ Column int `json:"col"`
+}
+
+func (l Location) String() string {
+ return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column)
+}
+
+func (l *Location) Inc() {
+ l.Column++
+}
+
+func (l *Location) Add(cols int) {
+ l.Column += cols
+}
+
+func (l *Location) Eol() {
+ l.Line++
+ l.Column = 1
+}
+
+type Token struct {
+ Id TokenId `json:"id"`
+ Loc Location `json:"loc"`
+ Value string `json:"val,omitempty"`
+}
+
+func (t Token) String() string {
+ switch t.Id {
+ case Ident:
+ return fmt.Sprintf("'%s'", t.Value)
+ default:
+ return fmt.Sprintf("%s at %s", t.Id, t.Loc)
+ }
+}