From 65c8beecb14f6d09c49504d74beedd58cc7ddd17 Mon Sep 17 00:00:00 2001 From: "Juan J. Martinez" Date: Wed, 7 Sep 2022 16:26:50 +0100 Subject: Better project layout, removed warnings --- src/Micro/Asm/Sdcc.hs | 1 + src/Micro/Ast.hs | 46 +++++++++++ src/Micro/Compiler.hs | 202 +++++++++++++++++++++++++++++++++++++++++++++ src/Micro/Env.hs | 63 ++++++++++++++ src/Micro/Error.hs | 39 +++++++++ src/Micro/Lexer.hs | 58 +++++++++++++ src/Micro/Parser.hs | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 632 insertions(+) create mode 100644 src/Micro/Asm/Sdcc.hs create mode 100644 src/Micro/Ast.hs create mode 100644 src/Micro/Compiler.hs create mode 100644 src/Micro/Env.hs create mode 100644 src/Micro/Error.hs create mode 100644 src/Micro/Lexer.hs create mode 100644 src/Micro/Parser.hs (limited to 'src/Micro') diff --git a/src/Micro/Asm/Sdcc.hs b/src/Micro/Asm/Sdcc.hs new file mode 100644 index 0000000..60a2caf --- /dev/null +++ b/src/Micro/Asm/Sdcc.hs @@ -0,0 +1 @@ +module Micro.Asm.Sdcc where diff --git a/src/Micro/Ast.hs b/src/Micro/Ast.hs new file mode 100644 index 0000000..45697de --- /dev/null +++ b/src/Micro/Ast.hs @@ -0,0 +1,46 @@ +module Micro.Ast where + +import Data.List (intercalate) +import Text.Parsec (SourcePos) + +type Ident = String + +data Type = Type String | FuncType [Type] (Maybe Type) deriving (Eq, Ord) + +instance Show Type where + show (Type t) = t + show (FuncType params rtyp) = + "(" ++ (intercalate ", " (fmap show params)) ++ ") -> " ++ case rtyp of + Just t -> show t + Nothing -> "()" + +showList :: [Type] -> String +showList xs = intercalate ", " $ fmap show xs + +type FuncParam = (Ident, Type, Bool, SourcePos) + +data Expr + = Num Integer SourcePos + | Bool' Bool SourcePos + | BinOp Op SourcePos Expr Expr + | Variable Ident SourcePos + | -- v type value private pos + Var Ident Type Expr Bool SourcePos + | -- fn [params] return body private anomyous pos + Func Ident [FuncParam] (Maybe Type) [Expr] Bool Bool SourcePos + | Call Expr [Expr] SourcePos + | Return (Maybe Expr) SourcePos + | Module String SourcePos + deriving (Eq, Ord, Show) + +data Op + = Assign + | Plus + | Minus + | Mul + | Div + deriving (Eq, Ord, Show) + +toFuncType :: [FuncParam] -> Maybe Type -> Type +toFuncType params rtyp = + FuncType (map (\(_, t, _, _) -> t) params) rtyp diff --git a/src/Micro/Compiler.hs b/src/Micro/Compiler.hs new file mode 100644 index 0000000..90e710b --- /dev/null +++ b/src/Micro/Compiler.hs @@ -0,0 +1,202 @@ +module Micro.Compiler where + +import Control.Monad.State +import Data.Either (rights) +import Data.Maybe (catMaybes, fromMaybe) +import qualified Micro.Ast as A +import Micro.Env +import Micro.Error +import Text.Parsec (SourcePos) + +type CompState = (Env, [Error]) + +type CompResult = Either [Error] (Maybe A.Type) + +startState :: CompState +startState = (emptyEnv, []) + +-- | @foldlEither fn init xs@ folds left @xs@ applying a function @fn@ that +-- returns either, returning accumulated right and the collected lefts as a +-- list. +foldlEither :: (accr -> b -> Either accl accr) -> (accr, [accl]) -> [b] -> (accr, [accl]) +foldlEither fn init xs = + foldl + ( \(r, l) x -> case fn r x of + Left e -> (r, e : l) + Right x -> (x, l) + ) + init + xs + +-- | @addError error@ adds @error@ to the state and returns no type to allow +-- the compilation to continue. +addError :: Error -> State CompState CompResult +addError e = do + (ev, errs) <- get + put (ev, e : errs) + pure $ Right Nothing + +-- | @typecheckCall args params@ resolves @args@ and compares it with @params@, +-- returning a string describing an error or Nothing in case of type match. +typecheckCall :: [A.Expr] -> [A.Type] -> State CompState (Maybe String) +typecheckCall args params + | length args /= length params = return $ Just "invalid number of arguments in function call" + | length params == 0 = pure Nothing + | otherwise = do + -- resolve all args types + targs <- fmap rights $ traverse compile args + case sequence targs of + Just t + -- after resolution, we could have less "rights" + | length t /= length params -> return $ Nothing + -- compare types + | all (\(a, b) -> a == b) $ zip t params -> + return $ Nothing -- all good! + | otherwise -> return $ Just ("type mismatch in function call\n found: " ++ A.showList t ++ "\n expected: " ++ A.showList params) + Nothing -> + -- there was an error in on argument + pure Nothing + +showMaybet :: Maybe A.Type -> String +showMaybet Nothing = "()" +showMaybet (Just t) = show t + +-- | @typecheckVal value typ@ resolves @value@ and compares it to @typ@ type, +-- returning a string describing an error or Nothing in case of type match. +typecheckVal :: A.Expr -> Maybe A.Type -> State CompState (Maybe String) +typecheckVal value typ = do + r <- compile value + case r of + Right r + | r == typ -> pure Nothing + | otherwise -> return $ Just $ "type mismatch\n found: " ++ showMaybet r ++ "\n expected: " ++ showMaybet typ + Left _ -> pure Nothing -- error resolving value + +-- | @typecheckReturn value fret@ resolves @value@ and compares it with @fret@, +-- returning a string decribing an error or Nothing in case of a type match. +typecheckReturn :: Maybe A.Expr -> Maybe A.Type -> State CompState (Maybe String) +typecheckReturn Nothing Nothing = return $ Nothing +typecheckReturn Nothing fret = return $ Just $ "invalid return value\n found: ()\n expected: " ++ showMaybet fret +typecheckReturn (Just value) fret = typecheckVal value fret + +-- | @typecheckBinOp a b pos@ resolves @a@ (left) and compares it to the type +-- of @b@ via typecheckVal. +typecheckBinOp :: A.Expr -> A.Expr -> SourcePos -> State CompState CompResult +typecheckBinOp a b pos = do + l <- compile a + case l of + Right tl -> do + tr <- typecheckVal b $ tl + case tr of + Just err -> addError $ Error TypeError err pos + Nothing -> return $ Right $ tl + _ -> return $ Right Nothing -- error resolving left + +-- built-in types +types :: [String] +types = ["bool", "u8", "s8", "u16", "s16"] + +definedType :: A.Type -> Bool +definedType (A.Type t) = t `elem` types +definedType (A.FuncType ts r) = + all definedType ts && fromMaybe False (fmap definedType r) + +verifyFuncType :: String -> [A.FuncParam] -> Maybe A.Type -> SourcePos -> [Error] +verifyFuncType ident params ret pos = do + ( catMaybes $ + map + ( \(id, t, _, pos) -> + if not (definedType t) + then Just $ Error UndefinedType ("undefined type in function declaration \"" ++ id ++ "\"") pos + else Nothing + ) + params + ) + ++ if not (fromMaybe True (fmap definedType ret)) + then [Error UndefinedType ("undefined return type in \"" ++ ident ++ "\"") pos] + else [] + +compile :: A.Expr -> State CompState CompResult +compile x = do + case x of + (A.Module _ _) -> return $ Right Nothing + (A.Num _ _) -> return $ Right $ Just $ A.Type "u8" -- TODO: placeholder + (A.Bool' _ _) -> return $ Right $ Just $ A.Type "bool" + (A.BinOp A.Assign pos a@(A.Variable _ _) b) -> + typecheckBinOp a b pos + (A.BinOp A.Assign pos _ _) -> + addError $ Error InvalidTarget "invalid assignment target" pos + (A.BinOp _ pos a b) -> + -- TODO: types and invalid operators + typecheckBinOp a b pos + (A.Func ident params ret body priv anon pos) -> do + -- current env + (ev, errs) <- get + -- check for undefined types + (ev, errs) <- return $ (ev, (verifyFuncType ident params ret pos) ++ errs) + -- updated with the function + (ev, errs) <- + return $ case addSymUniq ev (ident, ftype, priv, pos) of + Left err -> (ev, err : errs) + Right ev -> (ev, errs) + -- lambdas can only access local variables (closures aren't supported) + fev <- return $ if anon then emptyEnv else ev + -- with parameters + (nev, errs) <- return $ foldlEither addSymUniq (addEnv fev, errs) params + -- helper for return + nev <- return $ addSym nev ("$fn$", ftype, True, pos) + put (nev, errs) + _ <- compileAll body + (_, errs) <- get + -- store updated errors and the env with the function + put (ev, errs) + return $ Right $ Just ftype + where + ftype = A.toFuncType params ret + (A.Call ident args pos) -> do + r <- compile ident + case r of + Right (Just (A.FuncType params rtyp)) -> do + r <- typecheckCall args params + case r of + Just err -> addError $ Error TypeError err pos + Nothing -> return $ Right rtyp + Right _ -> addError $ Error NonCallable "non callable value in function call" pos + _ -> pure $ Right Nothing + (A.Var ident typ val priv pos) -> do + (ev, errs) <- get + (ev, errs) <- return $ foldlEither addSymUniq (ev, errs) [(ident, typ, priv, pos)] + errs <- + return $ + if not (definedType typ) + then Error UndefinedType ("undefined type in variable declaration \"" ++ ident ++ "\"") pos : errs + else errs + put (ev, errs) + vt <- typecheckVal val $ Just typ + case vt of + Just err -> addError $ Error TypeError err pos + Nothing -> return $ Right $ Just typ + (A.Return value pos) -> do + (ev, _) <- get + case getSyml ev "$fn$" of + Just (_, A.FuncType _ rtyp, _, _) -> do + r <- typecheckReturn value rtyp + case r of + Just err -> addError $ Error TypeError err pos + Nothing -> return $ Right rtyp + _ -> addError $ Error UnexpectedReturn "return without function call" pos + (A.Variable ident pos) -> do + (ev, _) <- get + case getSym ev ident of + Just (_, t, _, _) -> return $ Right $ Just t + Nothing -> addError $ Error Undefined ("undefined variable \"" ++ ident ++ "\"") pos + +compileAll :: [A.Expr] -> State CompState CompResult +compileAll (x : xs) = do + _ <- compile x + compileAll xs +compileAll [] = do + (_, errs) <- get + case errs of + [] -> pure $ Right Nothing + _ -> return $ Left errs diff --git a/src/Micro/Env.hs b/src/Micro/Env.hs new file mode 100644 index 0000000..4174158 --- /dev/null +++ b/src/Micro/Env.hs @@ -0,0 +1,63 @@ +module Micro.Env where + +import qualified Data.Map as Map +import Data.Maybe (isJust) +import qualified Micro.Ast as A +import Micro.Error +import Text.Parsec (SourcePos) + +type Sym = (A.Ident, A.Type, Bool, SourcePos) + +type SymMap = Map.Map A.Ident Sym + +data Env = Env SymMap (Maybe Env) deriving (Show) + +emptyEnv :: Env +emptyEnv = Env Map.empty Nothing + +-- | @getSymB local env ident@ checks @local@ parameter to tell if we look on +-- the local environment or if we should check also in the parent(s). +getSymB :: Bool -> Env -> A.Ident -> Maybe Sym +getSymB local (Env m parent) id = + case (local, Map.lookup id m) of + (False, Nothing) -> do + p <- parent + getSym p id + (_, s) -> s + +-- | Gets a symbol checking all the environments. +getSym :: Env -> A.Ident -> Maybe Sym +getSym = getSymB False + +-- | Gets a symbol checking the local environment. +getSyml :: Env -> A.Ident -> Maybe Sym +getSyml = getSymB True + +-- | Checks if a symbol exists. +existsSym :: Env -> A.Ident -> Bool +existsSym env sym = isJust $ getSym env sym + +-- | Checks if a local symbol exists in the local environment. +existsSyml :: Env -> A.Ident -> Bool +existsSyml env sym = isJust $ getSyml env sym + +-- | @addSym e s@ add symbol @s@ to enviroment @e@ and returns the modified +-- environment. It will create a new enviroment if the symbol already exists +-- (shadowing). +addSym :: Env -> Sym -> Env +addSym env@(Env m parent) sym@(id, _, _, _) = case getSym env id of + Nothing -> Env (Map.insert id sym m) parent + Just _ -> Env (Map.singleton id sym) $ Just env + +-- | @addEnv e@ adds a new local environment using @e@ as parent. +addEnv :: Env -> Env +addEnv env = Env Map.empty $ Just env + +-- | @addSymUniq e s@ adds a local symbol @s@ to the enviroment @e@ if it +-- doesn't exist. +addSymUniq :: Env -> Sym -> Either Error Env +addSymUniq ev (id, typ, priv, pos) = case getSyml ev id of + Nothing -> Right $ addSym ev sym + Just (_, _, _, p) -> Left $ Error AlreadyDefined ("symbol \"" ++ id ++ "\" already defined in " ++ show p) pos + where + sym = (id, typ, priv, pos) diff --git a/src/Micro/Error.hs b/src/Micro/Error.hs new file mode 100644 index 0000000..ced1318 --- /dev/null +++ b/src/Micro/Error.hs @@ -0,0 +1,39 @@ +module Micro.Error where + +import Data.List (sort) +import Text.Parsec (SourcePos, errorPos) +import Text.Parsec.Error (ParseError, errorMessages, showErrorMessages) + +data ErrorType = GenericError | TypeError | UnexpectedReturn | AlreadyDefined | NonCallable | Undefined | UndefinedType | InvalidTarget deriving (Show) + +instance Enum ErrorType where + fromEnum GenericError = 0 + fromEnum TypeError = 1 + fromEnum UnexpectedReturn = 2 + fromEnum AlreadyDefined = 3 + fromEnum NonCallable = 4 + fromEnum Undefined = 5 + fromEnum UndefinedType = 6 + fromEnum InvalidTarget = 7 + toEnum _ = error "toEnum is undefined for Error" + +data Error = Error ErrorType String SourcePos + deriving (Eq) + +instance Show Error where + show (Error _ message pos) = + show pos ++ " error: " ++ message + +showParserError :: ParseError -> String +showParserError err = + show (errorPos err) ++ " error: syntax error" + ++ showErrorMessages "or" "unknown parser error" " expected:" " found:" "end of input" (errorMessages err) + +instance Ord Error where + compare (Error _ _ pos1) (Error _ _ pos2) = compare pos1 pos2 + +instance Eq ErrorType where + e1 == e2 = fromEnum e1 == fromEnum e2 + +showErrorList :: [Error] -> String +showErrorList errs = unlines $ map show (sort errs) diff --git a/src/Micro/Lexer.hs b/src/Micro/Lexer.hs new file mode 100644 index 0000000..5496af1 --- /dev/null +++ b/src/Micro/Lexer.hs @@ -0,0 +1,58 @@ +module Micro.Lexer where + +import Data.Char (digitToInt) +import Text.Parsec +import Text.Parsec.Language (emptyDef) +import Text.Parsec.String (Parser) +import qualified Text.Parsec.Token as T + +scanner :: T.TokenParser () +scanner = T.makeTokenParser style + where + ops = ["+", "*", "-", ";", "="] + names = ["module", "private", "var", "def", "return", "->", "true", "false"] + style = + emptyDef + { T.commentLine = "#", + T.reservedOpNames = ops, + T.reservedNames = names + } + +binNum :: Parser Integer +binNum = do + _ <- char '0' + _ <- oneOf "bB" + digits <- many1 $ oneOf "01" + let n = foldl (\x d -> 2 * x + toInteger (digitToInt d)) 0 digits + seq n $ return n + +integer :: Parser Integer +integer = try binNum <|> T.integer scanner + +parens :: Parser a -> Parser a +parens = T.parens scanner + +braces :: Parser a -> Parser a +braces = T.braces scanner + +commaSep :: Parser a -> Parser [a] +commaSep = T.commaSep scanner + +colonSep :: Parser String +colonSep = T.colon scanner + +identifier :: Parser String +identifier = T.identifier scanner + +reserved :: String -> Parser () +reserved = T.reserved scanner + +reservedOp :: String -> Parser () +reservedOp = T.reservedOp scanner + +scan :: Parser a -> Parser a +scan p = do + T.whiteSpace scanner + r <- p + eof + return r diff --git a/src/Micro/Parser.hs b/src/Micro/Parser.hs new file mode 100644 index 0000000..ea4873e --- /dev/null +++ b/src/Micro/Parser.hs @@ -0,0 +1,223 @@ +module Micro.Parser where + +import Control.Monad.Identity (Identity) +import Data.Maybe (isJust) +import Micro.Ast +import Micro.Lexer +import Text.Parsec +import qualified Text.Parsec.Expr as E +import Text.Parsec.String (Parser) + +binary :: String -> Op -> E.Assoc -> E.Operator String () Identity Expr +binary s f assoc = + E.Infix + ( reservedOp s + >> do + pos <- getPosition + return $ BinOp f pos + ) + assoc + +opTable :: [[E.Operator String () Identity Expr]] +opTable = + [ [binary "=" Assign E.AssocLeft], + [ binary "*" Mul E.AssocLeft, + binary "/" Div E.AssocLeft + ], + [ binary "+" Plus E.AssocLeft, + binary "-" Minus E.AssocLeft + ] + ] + +expr :: Parser Expr +expr = E.buildExpressionParser opTable factor + +number :: Parser Expr +number = do + pos <- getPosition + n <- integer + return $ Num n pos + +true :: Parser Expr +true = do + pos <- getPosition + reserved "true" + return $ Bool' True pos + +false :: Parser Expr +false = do + pos <- getPosition + reserved "false" + return $ Bool' False pos + +variable :: Parser Expr +variable = do + pos <- getPosition + var <- identifier + return $ Variable var pos + +typ :: Parser Type +typ = do + p <- identifier + return $ Type p + +typFn :: Parser Type +typFn = do + p <- parens $ commaSep typ + r <- optionMaybe $ do + reserved "->" + typ + return $ FuncType p r + +type' :: Parser Type +type' = do + try typFn + <|> typ "type" + +-- argument +arg :: Parser (String, Type, Bool, SourcePos) +arg = do + pos <- getPosition + i <- identifier + _ <- colonSep "\":\" before type" + t <- type' "type" + return $ (i, t, True, pos) + +-- function definition (common to def and lambda) +fdef :: Ident -> Bool -> Bool -> SourcePos -> Parser Expr +fdef ident priv anon pos = do + args <- parens $ commaSep arg + rtyp <- + optionMaybe + ( do + _ <- colonSep "\":\" before type" + rtyp <- type' "return type" + return $ rtyp + ) + body <- + braces $ + many $ + do + x <- fStatement + pure $ [x] + <|> grVar True + return $ Func ident args rtyp (concat $ body) priv anon pos + +function :: Bool -> Parser Expr +function priv = do + pos <- getPosition + reserved "def" + ident <- identifier + fdef ident priv False pos + +-- ident: type = value +varWithValue :: Bool -> Parser Expr +varWithValue priv = do + (ident, typ, _, pos) <- arg + reservedOp "=" "assignation" + value <- expr + return $ Var ident typ value priv pos + +-- group variable declaration +grVar :: Bool -> Parser [Expr] +grVar priv = do + reserved "var" + xs <- parens $ commaSep $ varWithValue priv + reservedOp ";" + return $ xs + +-- variable declaration +var :: Bool -> Parser Expr +var priv = do + reserved "var" + x <- varWithValue priv + reservedOp ";" + return $ x + +-- private definition +privateDf :: (Bool -> Parser Expr) -> Parser Expr +privateDf f = do + priv <- optionMaybe $ reserved "private" + f (isJust priv) + +-- private group definition +privateDfn :: (Bool -> Parser [Expr]) -> Parser [Expr] +privateDfn f = do + priv <- optionMaybe $ reserved "private" + f (isJust priv) + +lambdaId :: SourcePos -> Ident +lambdaId s = + "lambda" ++ "@" ++ show (sourceLine s) ++ "," ++ show (sourceColumn s) + +lambda :: Parser Expr +lambda = do + pos <- getPosition + fdef (lambdaId pos) True True pos + +return' :: Parser Expr +return' = do + pos <- getPosition + reserved "return" + value <- optionMaybe expr + reservedOp ";" + return $ Return value pos + +call :: Parser Expr +call = do + pos <- getPosition + ident <- try lambda <|> variable + args <- parens $ commaSep expr + return $ Call ident args pos + +factor :: Parser Expr +factor = + number + <|> true + <|> false + <|> try call + <|> try lambda + <|> try variable + <|> parens expr + +exprStmt :: Parser Expr +exprStmt = do + e <- expr <|> factor + reservedOp ";" + return $ e + +-- statements that appear in functions +fStatement :: Parser Expr +fStatement = try exprStmt <|> var True <|> return' + +-- top level statement +statement :: Parser Expr +statement = + try exprStmt <|> try (privateDf var) + <|> return' -- this will raise an error + +module' :: Parser Expr +module' = do + pos <- getPosition + reserved "module" + ident <- identifier + return $ Module ident pos + +program :: Parser [Expr] +program = do + m <- module' + n <- + many $ + do + x <- try (privateDf function) <|> statement + pure $ [x] + <|> privateDfn grVar + return $ [m] ++ (concat $ n) + +parse :: Parser [Expr] +parse = program + +parseFromFile :: Parsec String () a -> FilePath -> IO (Either ParseError a) +parseFromFile p fname = do + input <- readFile fname + return (runParser p () fname input) -- cgit v1.2.3