aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIván Ávalos <avalos@disroot.org>2022-11-25 12:11:08 -0600
committerIván Ávalos <avalos@disroot.org>2022-11-25 12:11:08 -0600
commiteb4a3019bc0251e5b2b8229679e3c65d61d55336 (patch)
treef6b2a89dd35374272bd671933bfe87da4a587215
parent6b4e9a4e95eb511c194200e38ee323091dc5d7d2 (diff)
downloadjavanol-eb4a3019bc0251e5b2b8229679e3c65d61d55336.tar.gz
javanol-eb4a3019bc0251e5b2b8229679e3c65d61d55336.tar.bz2
javanol-eb4a3019bc0251e5b2b8229679e3c65d61d55336.zip
Buen progreso, pero se cicla
-rw-r--r--compilador/ast/type.py11
-rw-r--r--compilador/astree/decl.py (renamed from compilador/ast/decl.py)11
-rw-r--r--compilador/astree/expr.py (renamed from compilador/ast/expr.py)12
-rw-r--r--compilador/astree/ident.py (renamed from compilador/ast/ident.py)2
-rw-r--r--compilador/astree/type.py29
-rw-r--r--compilador/astree/unit.py9
-rw-r--r--compilador/errors.py16
-rw-r--r--compilador/lexer.py10
-rw-r--r--compilador/parse/base.py60
-rw-r--r--compilador/parse/decl.py102
-rw-r--r--compilador/parse/expr.py12
-rw-r--r--compilador/parse/ident.py15
-rw-r--r--compilador/parse/type.py63
-rw-r--r--compilador/parse/unit.py15
-rw-r--r--compilador/parser.py54
-rw-r--r--compilador/tabla.py5
16 files changed, 352 insertions, 74 deletions
diff --git a/compilador/ast/type.py b/compilador/ast/type.py
deleted file mode 100644
index 62a56e9..0000000
--- a/compilador/ast/type.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from enum import Enum
-
-# A built-in primitive type (int, bool, str, etc).
-class BuiltinType(Enum):
- BOOLEAN = 'booleano'
- STRING = 'cadena'
- CHAR = 'caracter'
- INT = 'entero'
- VOID = 'vacio'
-
-Type = BuiltinType
diff --git a/compilador/ast/decl.py b/compilador/astree/decl.py
index 1c12162..1e6de1d 100644
--- a/compilador/ast/decl.py
+++ b/compilador/astree/decl.py
@@ -1,8 +1,9 @@
+from dataclasses import dataclass
from typing import Optional
-from type import Type
-from ident import Ident
-from expr import Expr
+from astree.type import Type
+from astree.ident import Ident
+from astree.expr import Expr
# A global declaration.
#
@@ -11,11 +12,11 @@ from expr import Expr
class DeclGlobal:
ident: Ident
_type: Type
- init: Optional[Expr]
+ init: Expr
# A function declaration.
#
-# vacio main() { ... }
+# funcion vacio main() { ... }
@dataclass
class DeclFunc:
ident: Ident
diff --git a/compilador/ast/expr.py b/compilador/astree/expr.py
index 646733f..1bb7f4f 100644
--- a/compilador/ast/expr.py
+++ b/compilador/astree/expr.py
@@ -2,7 +2,9 @@ from dataclasses import dataclass
from enum import Enum, auto
from typing import List, Optional
-from type import Type
+from astree.type import Type
+
+Expr = None
# An assignment expression
#
@@ -43,7 +45,7 @@ class BinarithmExpr:
# foo(bar)
@dataclass
class CallExpr:
- lvalue: Expr,
+ lvalue: Expr
args: List[Expr]
# A compound expression.
@@ -74,7 +76,7 @@ ConstantExpr = Value | NumberConstant
@dataclass
class ForExpr:
bindings: Optional[Expr]
- cond: Expr,
+ cond: Expr
afterthought: Optional[Expr]
body: Expr
@@ -83,8 +85,8 @@ class ForExpr:
# si (a) { } sino { }
@dataclass
class IfExpr:
- cond: Expr,
- tbranch: Expr,
+ cond: Expr
+ tbranch: Expr
fbranch: Optional[Expr]
# A print statement.
diff --git a/compilador/ast/ident.py b/compilador/astree/ident.py
index b6a3acb..936b745 100644
--- a/compilador/ast/ident.py
+++ b/compilador/astree/ident.py
@@ -1,3 +1,3 @@
from typing import List
-Ident = List[str]
+Ident = str
diff --git a/compilador/astree/type.py b/compilador/astree/type.py
new file mode 100644
index 0000000..5389702
--- /dev/null
+++ b/compilador/astree/type.py
@@ -0,0 +1,29 @@
+from dataclasses import dataclass
+from typing import List
+from enum import Enum
+
+from tabla import Token
+
+Type = None
+
+# A built-in primitive type (int, bool, str, etc).
+class BuiltinType(Enum):
+ BOOLEAN = Token.BOOLEAN
+ STRING = Token.STRING
+ CHAR = Token.CHAR
+ INT = Token.INT
+ VOID = Token.VOID
+
+# A parameter to a function type.
+@dataclass
+class FuncParam:
+ name: str
+ _type: Type
+
+# funcion vacio ... (a: int, b: int ...)
+@dataclass
+class FuncType:
+ result: Type
+ params: List[FuncParam]
+
+Type = BuiltinType
diff --git a/compilador/astree/unit.py b/compilador/astree/unit.py
new file mode 100644
index 0000000..8ffdf19
--- /dev/null
+++ b/compilador/astree/unit.py
@@ -0,0 +1,9 @@
+from dataclasses import dataclass
+from typing import List
+
+from astree.decl import Decl
+
+# A single compilation unit, representing all of the members of a namespace.
+@dataclass
+class Unit:
+ decls: List[Decl]
diff --git a/compilador/errors.py b/compilador/errors.py
index 87c925e..5a4e7a2 100644
--- a/compilador/errors.py
+++ b/compilador/errors.py
@@ -15,9 +15,17 @@ class Error:
'S_ESPERA_PC': 'Se esperaba `;`',
}
- def __init__(self, error, numlinea):
- print("Error en línea %d: %s" % (numlinea, self.errors[error]), file=sys.stderr)
+ def __init__(self, msg: str = None):
+ self.message = msg
- def __init__(self, got: Token, expects: List[Token], numlinea = int):
+ @classmethod
+ def lex(self, error, numlinea: int):
+ return Error("Error en línea %d: %s" % (numlinea, self.errors[error]))
+
+ @classmethod
+ def syntax(self, got: Token, expects: List[Token], numlinea: int):
+ error = Error()
strexp = ', '.join(['`%s\'' % e.value for e in expects])
- self.message = "Error en la línea %d, se encontró `%s', pero se esperaba %s" % (numlinea, got.value, strexp)
+ error.message = ("Error en la línea %d: se encontró `%s', pero se esperaba %s" %
+ (numlinea, got.value, strexp))
+ return error
diff --git a/compilador/lexer.py b/compilador/lexer.py
index 2106c3f..feac970 100644
--- a/compilador/lexer.py
+++ b/compilador/lexer.py
@@ -22,7 +22,8 @@ reservadas = {
'leer': 'READ',
'retorna': 'RETURN',
'vacio': 'VOID',
- 'mientras': 'WHILE'
+ 'mientras': 'WHILE',
+ 'funcion': 'FUNCTION'
}
class Selector(Enum):
@@ -60,6 +61,9 @@ class Lexer:
if r == Control.ERROR: return 1
self.numlinea += 1
+ # End of file (EOF)
+ self.insertar_tabla(Token.EOF, None, None)
+
# Exportar tabla de símbolos
self.tabla.exportar(self.input_file + '.tab')
return 0
@@ -154,11 +158,11 @@ class Lexer:
def procesar_caracter(self, c):
if len(self.recol_caracter) > 1:
- Error('L_CAR_LARGO', self.numlinea)
+ print(Error.lex('L_CAR_LARGO', self.numlinea).message)
return Control.ERROR
if c == '\'':
if len(self.recol_caracter) == 0:
- Error('L_CAR_VACIO', self.numlinea)
+ print(Error('L_CAR_VACIO', self.numlinea).message)
return Control.ERROR
self.insertar_tabla(Token.CHAR_LIT, None, self.recol_caracter)
self.selector = Selector.NINGUNO
diff --git a/compilador/parse/base.py b/compilador/parse/base.py
new file mode 100644
index 0000000..a8f6f8a
--- /dev/null
+++ b/compilador/parse/base.py
@@ -0,0 +1,60 @@
+from enum import Enum
+from tabla import LexToken, TablaLex, tokens
+from arbol import Arbol, Nodo
+from shared import Control
+from pprint import pprint
+from errors import Error
+from typing import NoReturn
+from more_itertools import seekable
+
+from tabla import TablaLex, Token
+from errors import Error
+
+class BaseParser:
+ def __init__(self, iterador: seekable):
+ self.iterador: seekable = iterador
+
+ ''' Requires the next token to have a matching ltok. Returns that
+ token, or an error. '''
+ def want(self, *want: Token) -> (LexToken | Error):
+ tok: LexToken = self.lex()
+ if len(want) == 0:
+ return tok
+ for w in want:
+ if tok.tipo == w:
+ return tok
+
+ return Error.syntax(tok.tipo, want, tok.numlinea)
+
+ ''' Looks for a matching ltok from the lexer, and if not present,
+ unlexes the token and returns void. If found, the token is
+ consumed from the lexer and is returned. '''
+ def _try(self, *want: Token) -> (LexToken | NoReturn):
+ tok: LexToken = self.lex()
+ if len(want) == 0:
+ return tok
+ for w in want:
+ if tok.tipo == w:
+ return tok
+ self.unlex()
+
+ ''' Looks for a matching ltok from the lexer, unlexes the token,
+ and returns it; or void if it was not an ltok. '''
+ def peek(self, *want: Token) -> (LexToken | NoReturn):
+ tok: LexToken = self.iterador.peek()
+ if len(want) == 0:
+ return tok
+ for w in want:
+ if tok.tipo == w:
+ return tok
+
+ def lex(self):
+ return next(self.iterador)
+
+ def unlex(self):
+ self.iterador.seek(-1)
+
+ ''' Returns a syntax error if cond is false and void otherwise '''
+ def synassert(self, cond: bool, msg: str) -> (Error | NoReturn):
+ if not cond:
+ return Error(msg = msg)
diff --git a/compilador/parse/decl.py b/compilador/parse/decl.py
new file mode 100644
index 0000000..73f8580
--- /dev/null
+++ b/compilador/parse/decl.py
@@ -0,0 +1,102 @@
+from typing import List, cast, Optional
+from more_itertools import peekable
+
+from tabla import Token, LexToken
+from parse.base import BaseParser
+from errors import Error
+from parse.type import ParseType
+from parse.ident import ParseIdent
+from parse.expr import ParseExpr, Expr
+from astree.decl import DeclGlobal, DeclFunc, Decl
+
+class ParseDecl:
+ def __init__(self, parser: BaseParser):
+ self.parser = parser
+
+ def decl_global(self) -> (DeclGlobal | Error):
+ # Tipo
+ _type = ParseType(self.parser)._type()
+ if type(_type) is Error:
+ return _type
+
+ # Identificador
+ ident = ParseIdent(self.parser).ident()
+ if type(ident) is Error:
+ return ident
+
+ # =
+ init: Optional[Expr] = None
+ eq = self.parser._try(Token.EQUAL)
+ if eq:
+ # Expresión
+ init = ParseExpr(self.parser).expr()
+ if type(init) is Error:
+ return init
+
+ return DeclGlobal(ident = ident,
+ _type = _type,
+ init = init)
+
+ def decl_func(self) -> (DeclFunc | Error):
+ # funcion
+ tok = self.parser.want(Token.FUNCTION)
+ if type(tok) is Error:
+ return tok
+
+ # Tipo
+ _type = ParseType(self.parser)._type()
+ if type(_type) is Error:
+ return _type
+
+ # Identificador
+ ident = ParseIdent(self.parser).ident()
+ if type(ident) is Error:
+ return ident
+
+ # Prototipo
+ proto = ParseType(self.parser).prototype()
+ if type(proto) is Error:
+ return proto
+
+ # ;
+ # semicolon = self.parser.want(Token.SEMICOLON)
+ # if type(semicolon) is Error:
+ # return semicolon
+ # self.parser.unlex()
+
+ return DeclFunc(ident = ident,
+ prototype = proto,
+ body = None)
+
+ # Parses a declaration.
+ def decl(self) -> (Decl | Error):
+ toks = [Token.BOOLEAN, Token.CHAR, Token.INT, Token.STRING, Token.VOID]
+ _next = self.parser.peek(*toks)
+ decl: Optional[Decl] = None
+ if not _next:
+ decl = self.decl_func()
+ else:
+ decl = self.decl_global()
+
+ if type(decl) is Error:
+ return decl
+
+ # ;
+ semicolon = self.parser.want(Token.SEMICOLON)
+ if type(semicolon) is Error:
+ return semicolon
+
+ return decl
+
+ # Parses the declarations for a sub-parser.
+ def decls(self) -> (List[Decl] | Error):
+ decls: List[Decl] = []
+ while not self.parser.peek(Token.EOF):
+ # print(self.parser.peek())
+ # print(next(self.parser.iterador))
+ decl = self.decl()
+ if type(decl) is Error:
+ return decl
+ decls.append(decl)
+
+ return decls
diff --git a/compilador/parse/expr.py b/compilador/parse/expr.py
new file mode 100644
index 0000000..a00f3d5
--- /dev/null
+++ b/compilador/parse/expr.py
@@ -0,0 +1,12 @@
+from parse.base import BaseParser
+from errors import Error
+from astree.expr import Expr
+
+class ParseExpr:
+ def __init__(self, parser: BaseParser):
+ self.parser = parser
+
+ def expr(self) -> Expr | Error:
+ next(self.parser.iterador)
+ return
+
diff --git a/compilador/parse/ident.py b/compilador/parse/ident.py
new file mode 100644
index 0000000..5887fa2
--- /dev/null
+++ b/compilador/parse/ident.py
@@ -0,0 +1,15 @@
+
+from tabla import Token, LexToken
+from parse.base import BaseParser
+from astree.ident import Ident
+from errors import Error
+
+class ParseIdent:
+ def __init__(self, parser: BaseParser):
+ self.parser = parser
+
+ def ident(self) -> (Ident | Error):
+ tok: LexToken = self.parser.want(Token.IDENT)
+ if type(tok) is Error:
+ return tok
+ return tok.nombre
diff --git a/compilador/parse/type.py b/compilador/parse/type.py
new file mode 100644
index 0000000..27b83d3
--- /dev/null
+++ b/compilador/parse/type.py
@@ -0,0 +1,63 @@
+from typing import List
+
+from parse.base import BaseParser
+from lexer import LexToken, Token
+from astree.type import Type, BuiltinType, FuncType, FuncParam
+from errors import Error
+
+class ParseType:
+ def __init__(self, parser: BaseParser):
+ self.parser = parser
+
+ def _type(self) -> (Type | Error):
+ types = [Token.BOOLEAN, Token.CHAR, Token.INT, Token.STRING, Token.VOID]
+ tok = self.parser.want(*types)
+ if type(tok) is Error:
+ return tok
+ return BuiltinType(tok.tipo)
+
+ def prototype(self) -> (FuncType | Error):
+ params: List[FuncParam] = []
+
+ # Tipo
+ tok = ParseType(self.parser)._type()
+ if type(tok) is Error:
+ return tok
+ _type = tok
+
+ # (
+ tok = self.parser.want(Token.L_PAREN)
+ if type(tok) is Error:
+ return tok
+ while True:
+ tok = self.parser._try(Token.R_PAREN)
+ if not tok:
+ break
+
+ # Tipo
+ tok = ParseType(self.parser)._type()
+ if type(tok) is Error:
+ return tok
+ __type: Type = tok
+
+ # Identificador
+ tok = self.parser.want(Token.IDENT)
+ if type(tok) is Error:
+ return tok
+ name: str = tok
+
+ params.append(FuncParam(name = name,
+ _type = __type))
+
+ # ,
+ tok = self.parser._try(Token.COMMA)
+ if not tok:
+ continue
+
+ # )
+ tok = self.parser.want(Token.R_PAREN)
+ if type(tok) is Error:
+ return tok
+
+ return FuncType(result = _type,
+ params = params)
diff --git a/compilador/parse/unit.py b/compilador/parse/unit.py
new file mode 100644
index 0000000..954b8b4
--- /dev/null
+++ b/compilador/parse/unit.py
@@ -0,0 +1,15 @@
+
+from errors import Error
+from astree.unit import Unit
+from parse.base import BaseParser
+from parse.decl import ParseDecl
+
+class ParseUnit:
+ def __init__(self, parser: BaseParser):
+ self.parser: BaseParser = parser
+
+ def unit(self) -> (Unit | Error):
+ decls = ParseDecl(self.parser).decls()
+ if type(decls) is Error:
+ return decls
+ return Unit(decls = decls)
diff --git a/compilador/parser.py b/compilador/parser.py
index b1fd411..82ee3c8 100644
--- a/compilador/parser.py
+++ b/compilador/parser.py
@@ -1,13 +1,10 @@
-from enum import Enum
-from tabla import LexToken, TablaLex, tokens
-from arbol import Arbol, Nodo
-from shared import Control
+import sys
from pprint import pprint
-from errors import Error
-from typing import NoReturn
-from tabla import TablaLex, Token
+from tabla import TablaLex
from errors import Error
+from parse.base import BaseParser
+from parse.unit import ParseUnit
class Parser:
def __init__(self, input_file: str):
@@ -16,42 +13,11 @@ class Parser:
self.iterador = self.tabla.iterar()
def inicio(self):
- tok = self.want(Token.STRING_LIT, Token.BOOLEAN_LIT)
- if type(tok) == Error:
- print(tok.message)
+ parser = BaseParser(self.iterador)
+ unit = ParseUnit(parser).unit()
+ if type(unit) is Error:
+ print (unit.message, file=sys.stderr)
return 1
- return 0
-
- ''' Requires the next token to have a matching ltok. Returns that
- token, or an error. '''
- def want(self, *want: Token) -> (Token | Error):
- tok: LexToken = next(self.iterador)
- if len(want) == 0:
- return tok
- for w in want:
- if tok.tipo == w:
- return tok
- return Error(got = tok.tipo, expects = want, numlinea = tok.numlinea)
- ''' Looks for a matching ltok from the lexer, and if not present,
- unlexes the token and returns void. If found, the token is
- consumed from the lexer and is returned. '''
- def _try(self, *want: Token) -> (Token | NoReturn):
- tok: LexToken = next(self.iterador)
- if len(want) == 0:
- return tok
- for w in want:
- if tok.tipo == w:
- return tok
- self.iterador.seek(-1)
-
- ''' Looks for a matching ltok from the lexer, unlexes the token,
- and returns it; or void if it was not an ltok. '''
- def peek(self, *want: Token) -> (Token | NoReturn):
- tok: LexToken = next(self.iterador)
- self.iterador.seek(-1)
- if len(want) == 0:
- return tok
- for w in want:
- if tok.tipo == w:
- return tok
+ pprint(self.unit)
+ return 0
diff --git a/compilador/tabla.py b/compilador/tabla.py
index 62e1b9d..d0a8ffe 100644
--- a/compilador/tabla.py
+++ b/compilador/tabla.py
@@ -17,7 +17,8 @@ reservadas = [
'RETURN',
'STRING',
'VOID',
- 'WHILE'
+ 'WHILE',
+ 'FUNCTION'
]
literales = [
@@ -41,6 +42,7 @@ class Token(Enum):
RETURN = 'retorna'
STRING = 'cadena'
VOID = 'vacio'
+ FUNCTION = 'funcion'
WHILE = 'mientras'
BOOLEAN_LIT = 'BOOLEAN_LIT'
INT_LIT = 'INT_LIT'
@@ -67,6 +69,7 @@ class Token(Enum):
OR = '||'
EQEQ = '=='
NOTEQ = '!='
+ EOF = 'EOF'
tokens = reservadas + literales + [
'{', '}', '(', ')', ',', '\'',