1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
from enum import Enum
from symbol import LexToken, TablaLex, tokens
op_compuestos = ['>=', '<=', '==', '!=', '&&', '||', '++', '--']
op_simples_a = ['=', '+', '-', '&', '|']
op_simples_b = ['!', '<', '>']
op_simples = op_simples_a + op_simples_b
class Lexer:
tabla = TablaLex()
selector = ''
recol_string = ''
recol_caracter = ''
recol_comentario = ''
recol_operador = ''
recol_ident = ''
def inicio_lexer(self, data):
for c in data + "\n":
r = self.procesar_caracter(c)
if r == 2: return
while r != 0:
print ('r = ' + str(r))
r = self.procesar_caracter(c)
if r == 2: return
# Imprimir tabla de símbolos
print (str(self.tabla))
# c => caracter
# return 0 => siguiente caracter
# return 1 => repetir caracter
# return 2 => error
def procesar_caracter(self, c):
if c != "\t" and c != "\n":
print (c + ' (' + self.selector + ')')
if self.selector == '':
# Entradas a tokens
if c == '"':
self.selector = 'STRING_LIT'
return 0
elif c == '\'':
self.selector = 'CHAR_LIT'
return 0
elif c.isalpha() or c == '_':
self.selector = 'ID/RESERVADA'
elif c == '/':
self.recol_comentario = '/'
elif c in op_simples_a and self.recol_operador == '':
self.recol_operador = c
return 0
elif c in op_simples_b:
self.recol_operador = c
return 0
elif (c == '{' or c == '}' or c == '(' or c == ')' or
c == ',' or c == '.' or c == ';' or (c == '*' and self.recol_comentario == '')):
self.tabla.insertar(LexToken(c, None, None, 1))
return 0
# Apertura de comentario
if self.recol_comentario == '/' and c == '*':
self.selector = 'COMMENT'
self.recol_comentario = ''
return 0
# Apertura de operador compuesto
if len(self.recol_operador) > 0:
rc = self.recol_operador + c
if rc in op_compuestos:
# Operador compuesto
self.tabla.insertar(LexToken(rc, None, None, 1))
self.recol_operador = ''
return 0
else:
# Operador simple
self.tabla.insertar(LexToken(self.recol_operador, None, None, 1))
if c in op_simples:
self.tabla.insertar(LexToken(c, None, None, 1))
self.recol_operador = ''
return 0
# Cadenas de texto
if self.selector == 'STRING_LIT':
if c == '"':
self.tabla.insertar(LexToken('STRING_LIT', None, self.recol_string, 1))
self.selector = ''
self.recol_string = ''
else:
self.recol_string += c
# Caracteres
if self.selector == 'CHAR_LIT':
if len(self.recol_caracter) > 1:
print ('Error: más de un caracter en una literal de caracter')
return 2
if c == '\'':
if len(self.recol_caracter) == 0:
print ('Error: literal de caracter vacía')
return 2
self.tabla.insertar(LexToken('CHAR_LIT', None, self.recol_caracter, 1))
self.selector = ''
self.recol_caracter = ''
else:
self.recol_caracter += c
# Comentarios
if self.selector == 'COMMENT':
if c == '*':
self.recol_comentario = c
elif self.recol_comentario == '*':
if c == '/':
self.selector = ''
self.recol_comentario = ''
else:
self.recol_comentario = ''
# Identificador o palabra reservada
if self.selector == 'ID/RESERVADA':
if c.isalnum() or c == '_':
self.recol_ident += c
else:
if self.recol_ident == 'booleano':
self.tabla.insertar(LexToken('BOOLEAN', None, None, 1))
elif self.recol_ident == 'detener':
self.tabla.insertar(LexToken('BREAK', None, None, 1))
elif self.recol_ident == 'byte':
self.tabla.insertar(LexToken('BYTE', None, None, 1))
elif self.recol_ident == 'caracter':
self.tabla.insertar(LexToken('CHAR', None, None, 1))
elif self.recol_ident == 'doble':
self.tabla.insertar(LexToken('DOUBLE', None, None, 1))
elif self.recol_ident == 'sino':
self.tabla.insertar(LexToken('ELSE', None, None, 1))
elif self.recol_ident == 'porcada':
self.tabla.insertar(LexToken('FOR', None, None, 1))
elif self.recol_ident == 'si':
self.tabla.insertar(LexToken('IF', None, None, 1))
elif self.recol_ident == 'entero':
self.tabla.insertar(LexToken('INT', None, None, 1))
elif self.recol_ident == 'imprimir':
self.tabla.insertar(LexToken('PRINT', None, None, 1))
elif self.recol_ident == 'leer':
self.tabla.insertar(LexToken('READ', None, None, 1))
elif self.recol_ident == 'retorna':
self.tabla.insertar(LexToken('RETURN', None, None, 1))
elif self.recol_ident == 'vacio':
self.tabla.insertar(LexToken('VOID', None, None, 1))
elif self.recol_ident == 'mientras':
self.tabla.insertar(LexToken('WHILE', None, None, 1))
elif self.recol_ident == 'verdadero':
self.tabla.insertar(LexToken('BOOLEAN_LIT', None, True, 1))
elif self.recol_ident == 'falso':
self.tabla.insertar(LexToken('BOOLEAN_LIT', None, False, 1))
else:
self.tabla.insertar(LexToken('IDENT', self.recol_ident, None, 1))
self.recol_ident = ''
self.selector = ''
return 1
return 0
|