diff options
author | Iván Ávalos <avalos@disroot.org> | 2022-11-08 09:07:39 -0600 |
---|---|---|
committer | Iván Ávalos <avalos@disroot.org> | 2022-11-08 09:07:39 -0600 |
commit | 2bd1a00abc55cff109f7baec2505a8d8d2e7d279 (patch) | |
tree | dcc440ed99869eb8501cff3301c4c2255b727f3d /lexer.py | |
parent | 54bc4ac44e300c24fa828ffda926bd4161bf912a (diff) | |
download | javanol-2bd1a00abc55cff109f7baec2505a8d8d2e7d279.tar.gz javanol-2bd1a00abc55cff109f7baec2505a8d8d2e7d279.tar.bz2 javanol-2bd1a00abc55cff109f7baec2505a8d8d2e7d279.zip |
Se estructura lexer en clase, y arreglo en identificadores
Diffstat (limited to 'lexer.py')
-rw-r--r-- | lexer.py | 205 |
1 files changed, 109 insertions, 96 deletions
@@ -6,142 +6,155 @@ op_simples_a = ['=', '+', '-', '&', '|'] op_simples_b = ['!', '<', '>'] op_simples = op_simples_a + op_simples_b -def inicio_lexer(data): +class Lexer: tabla = TablaLex() - selector = '' recol_string = '' recol_caracter = '' recol_comentario = '' recol_operador = '' recol_ident = '' - - for c in data + "\n": + + def inicio_lexer(self, data): + for c in data + "\n": + r = self.procesar_caracter(c) + if r == 2: return + while r != 0: + print ('r = ' + str(r)) + r = self.procesar_caracter(c) + if r == 2: return + + # Imprimir tabla de símbolos + print (str(self.tabla)) + + # c => caracter + # return 0 => siguiente caracter + # return 1 => repetir caracter + # return 2 => error + def procesar_caracter(self, c): if c != "\t" and c != "\n": - print (c + ' (' + selector + ')') - - if selector == '': + print (c + ' (' + self.selector + ')') + + if self.selector == '': # Entradas a tokens if c == '"': - selector = 'STRING_LIT' - continue + self.selector = 'STRING_LIT' + return 0 elif c == '\'': - selector = 'CHAR_LIT' - continue + self.selector = 'CHAR_LIT' + return 0 elif c.isalpha() or c == '_': - selector = 'ID/RESERVADA' + self.selector = 'ID/RESERVADA' elif c == '/': - recol_comentario = '/' - elif c in op_simples_a and recol_operador == '': - recol_operador = c - continue + self.recol_comentario = '/' + elif c in op_simples_a and self.recol_operador == '': + self.recol_operador = c + return 0 elif c in op_simples_b: - recol_operador = c - continue + self.recol_operador = c + return 0 elif (c == '{' or c == '}' or c == '(' or c == ')' or - c == ',' or c == '.' or c == ';' or (c == '*' and recol_comentario == '')): - tabla.insertar(LexToken(c, None, None, 1)) - continue + c == ',' or c == '.' or c == ';' or (c == '*' and self.recol_comentario == '')): + self.tabla.insertar(LexToken(c, None, None, 1)) + return 0 # Apertura de comentario - if recol_comentario == '/' and c == '*': - selector = 'COMMENT' - recol_comentario = '' - continue + if self.recol_comentario == '/' and c == '*': + self.selector = 'COMMENT' + self.recol_comentario = '' + return 0 # Apertura de operador compuesto - if len(recol_operador) > 0: - rc = recol_operador + c + if len(self.recol_operador) > 0: + rc = self.recol_operador + c if rc in op_compuestos: # Operador compuesto - tabla.insertar(LexToken(rc, None, None, 1)) - recol_operador = '' - continue + self.tabla.insertar(LexToken(rc, None, None, 1)) + self.recol_operador = '' + return 0 else: # Operador simple - tabla.insertar(LexToken(recol_operador, None, None, 1)) + self.tabla.insertar(LexToken(self.recol_operador, None, None, 1)) if c in op_simples: - tabla.insertar(LexToken(c, None, None, 1)) - recol_operador = '' - continue - + self.tabla.insertar(LexToken(c, None, None, 1)) + self.recol_operador = '' + return 0 + # Cadenas de texto - if selector == 'STRING_LIT': + if self.selector == 'STRING_LIT': if c == '"': - tabla.insertar(LexToken('STRING_LIT', None, recol_string, 1)) - selector = '' - recol_string = '' + self.tabla.insertar(LexToken('STRING_LIT', None, self.recol_string, 1)) + self.selector = '' + self.recol_string = '' else: - recol_string += c + self.recol_string += c # Caracteres - if selector == 'CHAR_LIT': - if len(recol_caracter) > 1: + if self.selector == 'CHAR_LIT': + if len(self.recol_caracter) > 1: print ('Error: más de un caracter en una literal de caracter') - break + return 2 if c == '\'': - if len(recol_caracter) == 0: + if len(self.recol_caracter) == 0: print ('Error: literal de caracter vacía') - break - tabla.insertar(LexToken('CHAR_LIT', None, recol_caracter, 1)) - selector = '' - recol_caracter = '' + return 2 + self.tabla.insertar(LexToken('CHAR_LIT', None, self.recol_caracter, 1)) + self.selector = '' + self.recol_caracter = '' else: - recol_caracter += c + self.recol_caracter += c # Comentarios - if selector == 'COMMENT': + if self.selector == 'COMMENT': if c == '*': - recol_comentario = c - elif recol_comentario == '*': + self.recol_comentario = c + elif self.recol_comentario == '*': if c == '/': - selector = '' - recol_comentario = '' + self.selector = '' + self.recol_comentario = '' else: - recol_comentario = '' + self.recol_comentario = '' # Identificador o palabra reservada - if selector == 'ID/RESERVADA': + if self.selector == 'ID/RESERVADA': if c.isalnum() or c == '_': - recol_ident += c + self.recol_ident += c else: - if recol_ident == 'booleano': - tabla.insertar(LexToken('BOOLEAN', None, None, 1)) - elif recol_ident == 'detener': - tabla.insertar(LexToken('BREAK', None, None, 1)) - elif recol_ident == 'byte': - tabla.insertar(LexToken('BYTE', None, None, 1)) - elif recol_ident == 'caracter': - tabla.insertar(LexToken('CHAR', None, None, 1)) - elif recol_ident == 'doble': - tabla.insertar(LexToken('DOUBLE', None, None, 1)) - elif recol_ident == 'sino': - tabla.insertar(LexToken('ELSE', None, None, 1)) - elif recol_ident == 'porcada': - tabla.insertar(LexToken('FOR', None, None, 1)) - elif recol_ident == 'si': - tabla.insertar(LexToken('IF', None, None, 1)) - elif recol_ident == 'entero': - tabla.insertar(LexToken('INT', None, None, 1)) - elif recol_ident == 'imprimir': - tabla.insertar(LexToken('PRINT', None, None, 1)) - elif recol_ident == 'leer': - tabla.insertar(LexToken('READ', None, None, 1)) - elif recol_ident == 'retorna': - tabla.insertar(LexToken('RETURN', None, None, 1)) - elif recol_ident == 'vacio': - tabla.insertar(LexToken('VOID', None, None, 1)) - elif recol_ident == 'mientras': - tabla.insertar(LexToken('WHILE', None, None, 1)) - elif recol_ident == 'verdadero': - tabla.insertar(LexToken('BOOLEAN_LIT', None, True, 1)) - elif recol_ident == 'falso': - tabla.insertar(LexToken('BOOLEAN_LIT', None, False, 1)) + if self.recol_ident == 'booleano': + self.tabla.insertar(LexToken('BOOLEAN', None, None, 1)) + elif self.recol_ident == 'detener': + self.tabla.insertar(LexToken('BREAK', None, None, 1)) + elif self.recol_ident == 'byte': + self.tabla.insertar(LexToken('BYTE', None, None, 1)) + elif self.recol_ident == 'caracter': + self.tabla.insertar(LexToken('CHAR', None, None, 1)) + elif self.recol_ident == 'doble': + self.tabla.insertar(LexToken('DOUBLE', None, None, 1)) + elif self.recol_ident == 'sino': + self.tabla.insertar(LexToken('ELSE', None, None, 1)) + elif self.recol_ident == 'porcada': + self.tabla.insertar(LexToken('FOR', None, None, 1)) + elif self.recol_ident == 'si': + self.tabla.insertar(LexToken('IF', None, None, 1)) + elif self.recol_ident == 'entero': + self.tabla.insertar(LexToken('INT', None, None, 1)) + elif self.recol_ident == 'imprimir': + self.tabla.insertar(LexToken('PRINT', None, None, 1)) + elif self.recol_ident == 'leer': + self.tabla.insertar(LexToken('READ', None, None, 1)) + elif self.recol_ident == 'retorna': + self.tabla.insertar(LexToken('RETURN', None, None, 1)) + elif self.recol_ident == 'vacio': + self.tabla.insertar(LexToken('VOID', None, None, 1)) + elif self.recol_ident == 'mientras': + self.tabla.insertar(LexToken('WHILE', None, None, 1)) + elif self.recol_ident == 'verdadero': + self.tabla.insertar(LexToken('BOOLEAN_LIT', None, True, 1)) + elif self.recol_ident == 'falso': + self.tabla.insertar(LexToken('BOOLEAN_LIT', None, False, 1)) else: - tabla.insertar(LexToken('IDENT', recol_ident, None, 1)) - recol_ident = '' - selector = '' - - - # Imprimir tabla de símbolos - print (str(tabla)) + self.tabla.insertar(LexToken('IDENT', self.recol_ident, None, 1)) + self.recol_ident = '' + self.selector = '' + return 1 + return 0 |