diff options
author | Iván Ávalos <avalos@disroot.org> | 2022-11-25 12:11:08 -0600 |
---|---|---|
committer | Iván Ávalos <avalos@disroot.org> | 2022-11-25 12:11:08 -0600 |
commit | eb4a3019bc0251e5b2b8229679e3c65d61d55336 (patch) | |
tree | f6b2a89dd35374272bd671933bfe87da4a587215 | |
parent | 6b4e9a4e95eb511c194200e38ee323091dc5d7d2 (diff) | |
download | javanol-eb4a3019bc0251e5b2b8229679e3c65d61d55336.tar.gz javanol-eb4a3019bc0251e5b2b8229679e3c65d61d55336.tar.bz2 javanol-eb4a3019bc0251e5b2b8229679e3c65d61d55336.zip |
Buen progreso, pero se cicla
-rw-r--r-- | compilador/ast/type.py | 11 | ||||
-rw-r--r-- | compilador/astree/decl.py (renamed from compilador/ast/decl.py) | 11 | ||||
-rw-r--r-- | compilador/astree/expr.py (renamed from compilador/ast/expr.py) | 12 | ||||
-rw-r--r-- | compilador/astree/ident.py (renamed from compilador/ast/ident.py) | 2 | ||||
-rw-r--r-- | compilador/astree/type.py | 29 | ||||
-rw-r--r-- | compilador/astree/unit.py | 9 | ||||
-rw-r--r-- | compilador/errors.py | 16 | ||||
-rw-r--r-- | compilador/lexer.py | 10 | ||||
-rw-r--r-- | compilador/parse/base.py | 60 | ||||
-rw-r--r-- | compilador/parse/decl.py | 102 | ||||
-rw-r--r-- | compilador/parse/expr.py | 12 | ||||
-rw-r--r-- | compilador/parse/ident.py | 15 | ||||
-rw-r--r-- | compilador/parse/type.py | 63 | ||||
-rw-r--r-- | compilador/parse/unit.py | 15 | ||||
-rw-r--r-- | compilador/parser.py | 54 | ||||
-rw-r--r-- | compilador/tabla.py | 5 |
16 files changed, 352 insertions, 74 deletions
diff --git a/compilador/ast/type.py b/compilador/ast/type.py deleted file mode 100644 index 62a56e9..0000000 --- a/compilador/ast/type.py +++ /dev/null @@ -1,11 +0,0 @@ -from enum import Enum - -# A built-in primitive type (int, bool, str, etc). -class BuiltinType(Enum): - BOOLEAN = 'booleano' - STRING = 'cadena' - CHAR = 'caracter' - INT = 'entero' - VOID = 'vacio' - -Type = BuiltinType diff --git a/compilador/ast/decl.py b/compilador/astree/decl.py index 1c12162..1e6de1d 100644 --- a/compilador/ast/decl.py +++ b/compilador/astree/decl.py @@ -1,8 +1,9 @@ +from dataclasses import dataclass from typing import Optional -from type import Type -from ident import Ident -from expr import Expr +from astree.type import Type +from astree.ident import Ident +from astree.expr import Expr # A global declaration. # @@ -11,11 +12,11 @@ from expr import Expr class DeclGlobal: ident: Ident _type: Type - init: Optional[Expr] + init: Expr # A function declaration. # -# vacio main() { ... } +# funcion vacio main() { ... } @dataclass class DeclFunc: ident: Ident diff --git a/compilador/ast/expr.py b/compilador/astree/expr.py index 646733f..1bb7f4f 100644 --- a/compilador/ast/expr.py +++ b/compilador/astree/expr.py @@ -2,7 +2,9 @@ from dataclasses import dataclass from enum import Enum, auto from typing import List, Optional -from type import Type +from astree.type import Type + +Expr = None # An assignment expression # @@ -43,7 +45,7 @@ class BinarithmExpr: # foo(bar) @dataclass class CallExpr: - lvalue: Expr, + lvalue: Expr args: List[Expr] # A compound expression. @@ -74,7 +76,7 @@ ConstantExpr = Value | NumberConstant @dataclass class ForExpr: bindings: Optional[Expr] - cond: Expr, + cond: Expr afterthought: Optional[Expr] body: Expr @@ -83,8 +85,8 @@ class ForExpr: # si (a) { } sino { } @dataclass class IfExpr: - cond: Expr, - tbranch: Expr, + cond: Expr + tbranch: Expr fbranch: Optional[Expr] # A print statement. diff --git a/compilador/ast/ident.py b/compilador/astree/ident.py index b6a3acb..936b745 100644 --- a/compilador/ast/ident.py +++ b/compilador/astree/ident.py @@ -1,3 +1,3 @@ from typing import List -Ident = List[str] +Ident = str diff --git a/compilador/astree/type.py b/compilador/astree/type.py new file mode 100644 index 0000000..5389702 --- /dev/null +++ b/compilador/astree/type.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass +from typing import List +from enum import Enum + +from tabla import Token + +Type = None + +# A built-in primitive type (int, bool, str, etc). +class BuiltinType(Enum): + BOOLEAN = Token.BOOLEAN + STRING = Token.STRING + CHAR = Token.CHAR + INT = Token.INT + VOID = Token.VOID + +# A parameter to a function type. +@dataclass +class FuncParam: + name: str + _type: Type + +# funcion vacio ... (a: int, b: int ...) +@dataclass +class FuncType: + result: Type + params: List[FuncParam] + +Type = BuiltinType diff --git a/compilador/astree/unit.py b/compilador/astree/unit.py new file mode 100644 index 0000000..8ffdf19 --- /dev/null +++ b/compilador/astree/unit.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import List + +from astree.decl import Decl + +# A single compilation unit, representing all of the members of a namespace. +@dataclass +class Unit: + decls: List[Decl] diff --git a/compilador/errors.py b/compilador/errors.py index 87c925e..5a4e7a2 100644 --- a/compilador/errors.py +++ b/compilador/errors.py @@ -15,9 +15,17 @@ class Error: 'S_ESPERA_PC': 'Se esperaba `;`', } - def __init__(self, error, numlinea): - print("Error en línea %d: %s" % (numlinea, self.errors[error]), file=sys.stderr) + def __init__(self, msg: str = None): + self.message = msg - def __init__(self, got: Token, expects: List[Token], numlinea = int): + @classmethod + def lex(self, error, numlinea: int): + return Error("Error en línea %d: %s" % (numlinea, self.errors[error])) + + @classmethod + def syntax(self, got: Token, expects: List[Token], numlinea: int): + error = Error() strexp = ', '.join(['`%s\'' % e.value for e in expects]) - self.message = "Error en la línea %d, se encontró `%s', pero se esperaba %s" % (numlinea, got.value, strexp) + error.message = ("Error en la línea %d: se encontró `%s', pero se esperaba %s" % + (numlinea, got.value, strexp)) + return error diff --git a/compilador/lexer.py b/compilador/lexer.py index 2106c3f..feac970 100644 --- a/compilador/lexer.py +++ b/compilador/lexer.py @@ -22,7 +22,8 @@ reservadas = { 'leer': 'READ', 'retorna': 'RETURN', 'vacio': 'VOID', - 'mientras': 'WHILE' + 'mientras': 'WHILE', + 'funcion': 'FUNCTION' } class Selector(Enum): @@ -60,6 +61,9 @@ class Lexer: if r == Control.ERROR: return 1 self.numlinea += 1 + # End of file (EOF) + self.insertar_tabla(Token.EOF, None, None) + # Exportar tabla de símbolos self.tabla.exportar(self.input_file + '.tab') return 0 @@ -154,11 +158,11 @@ class Lexer: def procesar_caracter(self, c): if len(self.recol_caracter) > 1: - Error('L_CAR_LARGO', self.numlinea) + print(Error.lex('L_CAR_LARGO', self.numlinea).message) return Control.ERROR if c == '\'': if len(self.recol_caracter) == 0: - Error('L_CAR_VACIO', self.numlinea) + print(Error('L_CAR_VACIO', self.numlinea).message) return Control.ERROR self.insertar_tabla(Token.CHAR_LIT, None, self.recol_caracter) self.selector = Selector.NINGUNO diff --git a/compilador/parse/base.py b/compilador/parse/base.py new file mode 100644 index 0000000..a8f6f8a --- /dev/null +++ b/compilador/parse/base.py @@ -0,0 +1,60 @@ +from enum import Enum +from tabla import LexToken, TablaLex, tokens +from arbol import Arbol, Nodo +from shared import Control +from pprint import pprint +from errors import Error +from typing import NoReturn +from more_itertools import seekable + +from tabla import TablaLex, Token +from errors import Error + +class BaseParser: + def __init__(self, iterador: seekable): + self.iterador: seekable = iterador + + ''' Requires the next token to have a matching ltok. Returns that + token, or an error. ''' + def want(self, *want: Token) -> (LexToken | Error): + tok: LexToken = self.lex() + if len(want) == 0: + return tok + for w in want: + if tok.tipo == w: + return tok + + return Error.syntax(tok.tipo, want, tok.numlinea) + + ''' Looks for a matching ltok from the lexer, and if not present, + unlexes the token and returns void. If found, the token is + consumed from the lexer and is returned. ''' + def _try(self, *want: Token) -> (LexToken | NoReturn): + tok: LexToken = self.lex() + if len(want) == 0: + return tok + for w in want: + if tok.tipo == w: + return tok + self.unlex() + + ''' Looks for a matching ltok from the lexer, unlexes the token, + and returns it; or void if it was not an ltok. ''' + def peek(self, *want: Token) -> (LexToken | NoReturn): + tok: LexToken = self.iterador.peek() + if len(want) == 0: + return tok + for w in want: + if tok.tipo == w: + return tok + + def lex(self): + return next(self.iterador) + + def unlex(self): + self.iterador.seek(-1) + + ''' Returns a syntax error if cond is false and void otherwise ''' + def synassert(self, cond: bool, msg: str) -> (Error | NoReturn): + if not cond: + return Error(msg = msg) diff --git a/compilador/parse/decl.py b/compilador/parse/decl.py new file mode 100644 index 0000000..73f8580 --- /dev/null +++ b/compilador/parse/decl.py @@ -0,0 +1,102 @@ +from typing import List, cast, Optional +from more_itertools import peekable + +from tabla import Token, LexToken +from parse.base import BaseParser +from errors import Error +from parse.type import ParseType +from parse.ident import ParseIdent +from parse.expr import ParseExpr, Expr +from astree.decl import DeclGlobal, DeclFunc, Decl + +class ParseDecl: + def __init__(self, parser: BaseParser): + self.parser = parser + + def decl_global(self) -> (DeclGlobal | Error): + # Tipo + _type = ParseType(self.parser)._type() + if type(_type) is Error: + return _type + + # Identificador + ident = ParseIdent(self.parser).ident() + if type(ident) is Error: + return ident + + # = + init: Optional[Expr] = None + eq = self.parser._try(Token.EQUAL) + if eq: + # Expresión + init = ParseExpr(self.parser).expr() + if type(init) is Error: + return init + + return DeclGlobal(ident = ident, + _type = _type, + init = init) + + def decl_func(self) -> (DeclFunc | Error): + # funcion + tok = self.parser.want(Token.FUNCTION) + if type(tok) is Error: + return tok + + # Tipo + _type = ParseType(self.parser)._type() + if type(_type) is Error: + return _type + + # Identificador + ident = ParseIdent(self.parser).ident() + if type(ident) is Error: + return ident + + # Prototipo + proto = ParseType(self.parser).prototype() + if type(proto) is Error: + return proto + + # ; + # semicolon = self.parser.want(Token.SEMICOLON) + # if type(semicolon) is Error: + # return semicolon + # self.parser.unlex() + + return DeclFunc(ident = ident, + prototype = proto, + body = None) + + # Parses a declaration. + def decl(self) -> (Decl | Error): + toks = [Token.BOOLEAN, Token.CHAR, Token.INT, Token.STRING, Token.VOID] + _next = self.parser.peek(*toks) + decl: Optional[Decl] = None + if not _next: + decl = self.decl_func() + else: + decl = self.decl_global() + + if type(decl) is Error: + return decl + + # ; + semicolon = self.parser.want(Token.SEMICOLON) + if type(semicolon) is Error: + return semicolon + + return decl + + # Parses the declarations for a sub-parser. + def decls(self) -> (List[Decl] | Error): + decls: List[Decl] = [] + while not self.parser.peek(Token.EOF): + # print(self.parser.peek()) + # print(next(self.parser.iterador)) + decl = self.decl() + if type(decl) is Error: + return decl + decls.append(decl) + + return decls diff --git a/compilador/parse/expr.py b/compilador/parse/expr.py new file mode 100644 index 0000000..a00f3d5 --- /dev/null +++ b/compilador/parse/expr.py @@ -0,0 +1,12 @@ +from parse.base import BaseParser +from errors import Error +from astree.expr import Expr + +class ParseExpr: + def __init__(self, parser: BaseParser): + self.parser = parser + + def expr(self) -> Expr | Error: + next(self.parser.iterador) + return + diff --git a/compilador/parse/ident.py b/compilador/parse/ident.py new file mode 100644 index 0000000..5887fa2 --- /dev/null +++ b/compilador/parse/ident.py @@ -0,0 +1,15 @@ + +from tabla import Token, LexToken +from parse.base import BaseParser +from astree.ident import Ident +from errors import Error + +class ParseIdent: + def __init__(self, parser: BaseParser): + self.parser = parser + + def ident(self) -> (Ident | Error): + tok: LexToken = self.parser.want(Token.IDENT) + if type(tok) is Error: + return tok + return tok.nombre diff --git a/compilador/parse/type.py b/compilador/parse/type.py new file mode 100644 index 0000000..27b83d3 --- /dev/null +++ b/compilador/parse/type.py @@ -0,0 +1,63 @@ +from typing import List + +from parse.base import BaseParser +from lexer import LexToken, Token +from astree.type import Type, BuiltinType, FuncType, FuncParam +from errors import Error + +class ParseType: + def __init__(self, parser: BaseParser): + self.parser = parser + + def _type(self) -> (Type | Error): + types = [Token.BOOLEAN, Token.CHAR, Token.INT, Token.STRING, Token.VOID] + tok = self.parser.want(*types) + if type(tok) is Error: + return tok + return BuiltinType(tok.tipo) + + def prototype(self) -> (FuncType | Error): + params: List[FuncParam] = [] + + # Tipo + tok = ParseType(self.parser)._type() + if type(tok) is Error: + return tok + _type = tok + + # ( + tok = self.parser.want(Token.L_PAREN) + if type(tok) is Error: + return tok + while True: + tok = self.parser._try(Token.R_PAREN) + if not tok: + break + + # Tipo + tok = ParseType(self.parser)._type() + if type(tok) is Error: + return tok + __type: Type = tok + + # Identificador + tok = self.parser.want(Token.IDENT) + if type(tok) is Error: + return tok + name: str = tok + + params.append(FuncParam(name = name, + _type = __type)) + + # , + tok = self.parser._try(Token.COMMA) + if not tok: + continue + + # ) + tok = self.parser.want(Token.R_PAREN) + if type(tok) is Error: + return tok + + return FuncType(result = _type, + params = params) diff --git a/compilador/parse/unit.py b/compilador/parse/unit.py new file mode 100644 index 0000000..954b8b4 --- /dev/null +++ b/compilador/parse/unit.py @@ -0,0 +1,15 @@ + +from errors import Error +from astree.unit import Unit +from parse.base import BaseParser +from parse.decl import ParseDecl + +class ParseUnit: + def __init__(self, parser: BaseParser): + self.parser: BaseParser = parser + + def unit(self) -> (Unit | Error): + decls = ParseDecl(self.parser).decls() + if type(decls) is Error: + return decls + return Unit(decls = decls) diff --git a/compilador/parser.py b/compilador/parser.py index b1fd411..82ee3c8 100644 --- a/compilador/parser.py +++ b/compilador/parser.py @@ -1,13 +1,10 @@ -from enum import Enum -from tabla import LexToken, TablaLex, tokens -from arbol import Arbol, Nodo -from shared import Control +import sys from pprint import pprint -from errors import Error -from typing import NoReturn -from tabla import TablaLex, Token +from tabla import TablaLex from errors import Error +from parse.base import BaseParser +from parse.unit import ParseUnit class Parser: def __init__(self, input_file: str): @@ -16,42 +13,11 @@ class Parser: self.iterador = self.tabla.iterar() def inicio(self): - tok = self.want(Token.STRING_LIT, Token.BOOLEAN_LIT) - if type(tok) == Error: - print(tok.message) + parser = BaseParser(self.iterador) + unit = ParseUnit(parser).unit() + if type(unit) is Error: + print (unit.message, file=sys.stderr) return 1 - return 0 - - ''' Requires the next token to have a matching ltok. Returns that - token, or an error. ''' - def want(self, *want: Token) -> (Token | Error): - tok: LexToken = next(self.iterador) - if len(want) == 0: - return tok - for w in want: - if tok.tipo == w: - return tok - return Error(got = tok.tipo, expects = want, numlinea = tok.numlinea) - ''' Looks for a matching ltok from the lexer, and if not present, - unlexes the token and returns void. If found, the token is - consumed from the lexer and is returned. ''' - def _try(self, *want: Token) -> (Token | NoReturn): - tok: LexToken = next(self.iterador) - if len(want) == 0: - return tok - for w in want: - if tok.tipo == w: - return tok - self.iterador.seek(-1) - - ''' Looks for a matching ltok from the lexer, unlexes the token, - and returns it; or void if it was not an ltok. ''' - def peek(self, *want: Token) -> (Token | NoReturn): - tok: LexToken = next(self.iterador) - self.iterador.seek(-1) - if len(want) == 0: - return tok - for w in want: - if tok.tipo == w: - return tok + pprint(self.unit) + return 0 diff --git a/compilador/tabla.py b/compilador/tabla.py index 62e1b9d..d0a8ffe 100644 --- a/compilador/tabla.py +++ b/compilador/tabla.py @@ -17,7 +17,8 @@ reservadas = [ 'RETURN', 'STRING', 'VOID', - 'WHILE' + 'WHILE', + 'FUNCTION' ] literales = [ @@ -41,6 +42,7 @@ class Token(Enum): RETURN = 'retorna' STRING = 'cadena' VOID = 'vacio' + FUNCTION = 'funcion' WHILE = 'mientras' BOOLEAN_LIT = 'BOOLEAN_LIT' INT_LIT = 'INT_LIT' @@ -67,6 +69,7 @@ class Token(Enum): OR = '||' EQEQ = '==' NOTEQ = '!=' + EOF = 'EOF' tokens = reservadas + literales + [ '{', '}', '(', ')', ',', '\'', |