aboutsummaryrefslogtreecommitdiff
path: root/lexer.py
diff options
context:
space:
mode:
authorIván Ávalos <avalos@disroot.org>2022-11-08 09:07:39 -0600
committerIván Ávalos <avalos@disroot.org>2022-11-08 09:07:39 -0600
commit2bd1a00abc55cff109f7baec2505a8d8d2e7d279 (patch)
treedcc440ed99869eb8501cff3301c4c2255b727f3d /lexer.py
parent54bc4ac44e300c24fa828ffda926bd4161bf912a (diff)
downloadjavanol-2bd1a00abc55cff109f7baec2505a8d8d2e7d279.tar.gz
javanol-2bd1a00abc55cff109f7baec2505a8d8d2e7d279.tar.bz2
javanol-2bd1a00abc55cff109f7baec2505a8d8d2e7d279.zip
Se estructura lexer en clase, y arreglo en identificadores
Diffstat (limited to 'lexer.py')
-rw-r--r--lexer.py205
1 files changed, 109 insertions, 96 deletions
diff --git a/lexer.py b/lexer.py
index 8fff41a..0232645 100644
--- a/lexer.py
+++ b/lexer.py
@@ -6,142 +6,155 @@ op_simples_a = ['=', '+', '-', '&', '|']
op_simples_b = ['!', '<', '>']
op_simples = op_simples_a + op_simples_b
-def inicio_lexer(data):
+class Lexer:
tabla = TablaLex()
-
selector = ''
recol_string = ''
recol_caracter = ''
recol_comentario = ''
recol_operador = ''
recol_ident = ''
-
- for c in data + "\n":
+
+ def inicio_lexer(self, data):
+ for c in data + "\n":
+ r = self.procesar_caracter(c)
+ if r == 2: return
+ while r != 0:
+ print ('r = ' + str(r))
+ r = self.procesar_caracter(c)
+ if r == 2: return
+
+ # Imprimir tabla de símbolos
+ print (str(self.tabla))
+
+ # c => caracter
+ # return 0 => siguiente caracter
+ # return 1 => repetir caracter
+ # return 2 => error
+ def procesar_caracter(self, c):
if c != "\t" and c != "\n":
- print (c + ' (' + selector + ')')
-
- if selector == '':
+ print (c + ' (' + self.selector + ')')
+
+ if self.selector == '':
# Entradas a tokens
if c == '"':
- selector = 'STRING_LIT'
- continue
+ self.selector = 'STRING_LIT'
+ return 0
elif c == '\'':
- selector = 'CHAR_LIT'
- continue
+ self.selector = 'CHAR_LIT'
+ return 0
elif c.isalpha() or c == '_':
- selector = 'ID/RESERVADA'
+ self.selector = 'ID/RESERVADA'
elif c == '/':
- recol_comentario = '/'
- elif c in op_simples_a and recol_operador == '':
- recol_operador = c
- continue
+ self.recol_comentario = '/'
+ elif c in op_simples_a and self.recol_operador == '':
+ self.recol_operador = c
+ return 0
elif c in op_simples_b:
- recol_operador = c
- continue
+ self.recol_operador = c
+ return 0
elif (c == '{' or c == '}' or c == '(' or c == ')' or
- c == ',' or c == '.' or c == ';' or (c == '*' and recol_comentario == '')):
- tabla.insertar(LexToken(c, None, None, 1))
- continue
+ c == ',' or c == '.' or c == ';' or (c == '*' and self.recol_comentario == '')):
+ self.tabla.insertar(LexToken(c, None, None, 1))
+ return 0
# Apertura de comentario
- if recol_comentario == '/' and c == '*':
- selector = 'COMMENT'
- recol_comentario = ''
- continue
+ if self.recol_comentario == '/' and c == '*':
+ self.selector = 'COMMENT'
+ self.recol_comentario = ''
+ return 0
# Apertura de operador compuesto
- if len(recol_operador) > 0:
- rc = recol_operador + c
+ if len(self.recol_operador) > 0:
+ rc = self.recol_operador + c
if rc in op_compuestos:
# Operador compuesto
- tabla.insertar(LexToken(rc, None, None, 1))
- recol_operador = ''
- continue
+ self.tabla.insertar(LexToken(rc, None, None, 1))
+ self.recol_operador = ''
+ return 0
else:
# Operador simple
- tabla.insertar(LexToken(recol_operador, None, None, 1))
+ self.tabla.insertar(LexToken(self.recol_operador, None, None, 1))
if c in op_simples:
- tabla.insertar(LexToken(c, None, None, 1))
- recol_operador = ''
- continue
-
+ self.tabla.insertar(LexToken(c, None, None, 1))
+ self.recol_operador = ''
+ return 0
+
# Cadenas de texto
- if selector == 'STRING_LIT':
+ if self.selector == 'STRING_LIT':
if c == '"':
- tabla.insertar(LexToken('STRING_LIT', None, recol_string, 1))
- selector = ''
- recol_string = ''
+ self.tabla.insertar(LexToken('STRING_LIT', None, self.recol_string, 1))
+ self.selector = ''
+ self.recol_string = ''
else:
- recol_string += c
+ self.recol_string += c
# Caracteres
- if selector == 'CHAR_LIT':
- if len(recol_caracter) > 1:
+ if self.selector == 'CHAR_LIT':
+ if len(self.recol_caracter) > 1:
print ('Error: más de un caracter en una literal de caracter')
- break
+ return 2
if c == '\'':
- if len(recol_caracter) == 0:
+ if len(self.recol_caracter) == 0:
print ('Error: literal de caracter vacía')
- break
- tabla.insertar(LexToken('CHAR_LIT', None, recol_caracter, 1))
- selector = ''
- recol_caracter = ''
+ return 2
+ self.tabla.insertar(LexToken('CHAR_LIT', None, self.recol_caracter, 1))
+ self.selector = ''
+ self.recol_caracter = ''
else:
- recol_caracter += c
+ self.recol_caracter += c
# Comentarios
- if selector == 'COMMENT':
+ if self.selector == 'COMMENT':
if c == '*':
- recol_comentario = c
- elif recol_comentario == '*':
+ self.recol_comentario = c
+ elif self.recol_comentario == '*':
if c == '/':
- selector = ''
- recol_comentario = ''
+ self.selector = ''
+ self.recol_comentario = ''
else:
- recol_comentario = ''
+ self.recol_comentario = ''
# Identificador o palabra reservada
- if selector == 'ID/RESERVADA':
+ if self.selector == 'ID/RESERVADA':
if c.isalnum() or c == '_':
- recol_ident += c
+ self.recol_ident += c
else:
- if recol_ident == 'booleano':
- tabla.insertar(LexToken('BOOLEAN', None, None, 1))
- elif recol_ident == 'detener':
- tabla.insertar(LexToken('BREAK', None, None, 1))
- elif recol_ident == 'byte':
- tabla.insertar(LexToken('BYTE', None, None, 1))
- elif recol_ident == 'caracter':
- tabla.insertar(LexToken('CHAR', None, None, 1))
- elif recol_ident == 'doble':
- tabla.insertar(LexToken('DOUBLE', None, None, 1))
- elif recol_ident == 'sino':
- tabla.insertar(LexToken('ELSE', None, None, 1))
- elif recol_ident == 'porcada':
- tabla.insertar(LexToken('FOR', None, None, 1))
- elif recol_ident == 'si':
- tabla.insertar(LexToken('IF', None, None, 1))
- elif recol_ident == 'entero':
- tabla.insertar(LexToken('INT', None, None, 1))
- elif recol_ident == 'imprimir':
- tabla.insertar(LexToken('PRINT', None, None, 1))
- elif recol_ident == 'leer':
- tabla.insertar(LexToken('READ', None, None, 1))
- elif recol_ident == 'retorna':
- tabla.insertar(LexToken('RETURN', None, None, 1))
- elif recol_ident == 'vacio':
- tabla.insertar(LexToken('VOID', None, None, 1))
- elif recol_ident == 'mientras':
- tabla.insertar(LexToken('WHILE', None, None, 1))
- elif recol_ident == 'verdadero':
- tabla.insertar(LexToken('BOOLEAN_LIT', None, True, 1))
- elif recol_ident == 'falso':
- tabla.insertar(LexToken('BOOLEAN_LIT', None, False, 1))
+ if self.recol_ident == 'booleano':
+ self.tabla.insertar(LexToken('BOOLEAN', None, None, 1))
+ elif self.recol_ident == 'detener':
+ self.tabla.insertar(LexToken('BREAK', None, None, 1))
+ elif self.recol_ident == 'byte':
+ self.tabla.insertar(LexToken('BYTE', None, None, 1))
+ elif self.recol_ident == 'caracter':
+ self.tabla.insertar(LexToken('CHAR', None, None, 1))
+ elif self.recol_ident == 'doble':
+ self.tabla.insertar(LexToken('DOUBLE', None, None, 1))
+ elif self.recol_ident == 'sino':
+ self.tabla.insertar(LexToken('ELSE', None, None, 1))
+ elif self.recol_ident == 'porcada':
+ self.tabla.insertar(LexToken('FOR', None, None, 1))
+ elif self.recol_ident == 'si':
+ self.tabla.insertar(LexToken('IF', None, None, 1))
+ elif self.recol_ident == 'entero':
+ self.tabla.insertar(LexToken('INT', None, None, 1))
+ elif self.recol_ident == 'imprimir':
+ self.tabla.insertar(LexToken('PRINT', None, None, 1))
+ elif self.recol_ident == 'leer':
+ self.tabla.insertar(LexToken('READ', None, None, 1))
+ elif self.recol_ident == 'retorna':
+ self.tabla.insertar(LexToken('RETURN', None, None, 1))
+ elif self.recol_ident == 'vacio':
+ self.tabla.insertar(LexToken('VOID', None, None, 1))
+ elif self.recol_ident == 'mientras':
+ self.tabla.insertar(LexToken('WHILE', None, None, 1))
+ elif self.recol_ident == 'verdadero':
+ self.tabla.insertar(LexToken('BOOLEAN_LIT', None, True, 1))
+ elif self.recol_ident == 'falso':
+ self.tabla.insertar(LexToken('BOOLEAN_LIT', None, False, 1))
else:
- tabla.insertar(LexToken('IDENT', recol_ident, None, 1))
- recol_ident = ''
- selector = ''
-
-
- # Imprimir tabla de símbolos
- print (str(tabla))
+ self.tabla.insertar(LexToken('IDENT', self.recol_ident, None, 1))
+ self.recol_ident = ''
+ self.selector = ''
+ return 1
+ return 0