27
27
import inspect
28
28
import itertools
29
29
30
+ # Python 2 and 3
30
31
try :
31
- basestring # Python 2
32
+ basestring # NOQA
32
33
except NameError :
33
- basestring = str # Python 3
34
+ basestring = str # NOQA
34
35
35
36
# Set to True to enable tracing for parsing
36
37
TRACE_PARSE = False
63
64
PARSE_INVALID_EXPRESSION = 3
64
65
PARSE_INVALID_NESTING = 4
65
66
PARSE_INVALID_SYMBOL_SEQUENCE = 5
67
+ PARSE_INVALID_OPERATOR_SEQUENCE = 6
66
68
67
69
PARSE_ERRORS = {
68
70
PARSE_UNKNOWN_TOKEN : 'Unknown token' ,
69
71
PARSE_UNBALANCED_CLOSING_PARENS : 'Unbalanced parenthesis' ,
70
72
PARSE_INVALID_EXPRESSION : 'Invalid expression' ,
71
73
PARSE_INVALID_NESTING : 'Invalid expression nesting such as (AND xx)' ,
72
74
PARSE_INVALID_SYMBOL_SEQUENCE : 'Invalid symbols sequence such as (A B)' ,
75
+ PARSE_INVALID_OPERATOR_SEQUENCE : 'Invalid operator sequence without symbols such as AND OR or OR OR' ,
73
76
}
74
77
75
78
@@ -181,9 +184,9 @@ def parse(self, expr, simplify=False):
181
184
and the algebra configured Symbol type is used to create Symbol
182
185
instances from Symbol tokens.
183
186
184
- If `expr` is an iterable, it should contain 3-tuples of: (token ,
185
- token_string, position ). In this case, the `token ` can be a Symbol
186
- instance or one of the TOKEN_* types.
187
+ If `expr` is an iterable, it should contain 3-tuples of: (token_type ,
188
+ token_string, token_position ). In this case, the `token_type ` can be
189
+ a Symbol instance or one of the TOKEN_* constant types.
187
190
See the `tokenize()` method for detailed specification.
188
191
"""
189
192
@@ -200,64 +203,89 @@ def parse(self, expr, simplify=False):
200
203
map (print , tokenized )
201
204
tokenized = iter (tokenized )
202
205
206
+ # the abstract syntax tree for this expression that will be build as we
207
+ # process tokens
208
+ # the first two items are None
209
+ # symbol items are appended to this structure
203
210
ast = [None , None ]
204
211
205
212
def is_sym (_t ):
206
- return _t == TOKEN_SYMBOL or isinstance ( _t , Symbol )
213
+ return isinstance ( _t , Symbol ) or _t in ( TOKEN_TRUE , TOKEN_FALSE , TOKEN_SYMBOL )
207
214
208
- prev = None
209
- for token , tokstr , position in tokenized :
215
+ def is_operator (_t ):
216
+ return _t in (TOKEN_AND , TOKEN_OR )
217
+
218
+ prev_token = None
219
+ for token_type , token_string , token_position in tokenized :
210
220
if TRACE_PARSE :
211
- print ('\n processing token:' , repr (token ), repr (tokstr ), repr (position ))
221
+ print ('\n processing token_type:' , repr (token_type ), 'token_string:' , repr (token_string ), 'token_position:' , repr (token_position ))
222
+
223
+ if prev_token :
224
+ prev_token_type , _prev_token_string , _prev_token_position = prev_token
225
+ if TRACE_PARSE :
226
+ print (' prev_token:' , repr (prev_token ))
227
+
228
+ if is_sym (prev_token_type ) and (is_sym (token_type )): # or token_type == TOKEN_LPAR) :
229
+ raise ParseError (token_type , token_string , token_position , PARSE_INVALID_SYMBOL_SEQUENCE )
212
230
213
- if prev :
214
- prev_token , _ , _ = prev
215
- if is_sym (prev_token ) and is_sym (token ):
216
- raise ParseError (token , tokstr , position , PARSE_INVALID_SYMBOL_SEQUENCE )
231
+ if is_operator (prev_token_type ) and (is_operator (token_type ) or token_type == TOKEN_RPAR ):
232
+ raise ParseError (token_type , token_string , token_position , PARSE_INVALID_OPERATOR_SEQUENCE )
217
233
218
- if token == TOKEN_SYMBOL :
219
- ast .append (self .Symbol (tokstr ))
234
+ else :
235
+ if is_operator (token_type ):
236
+ raise ParseError (token_type , token_string , token_position , PARSE_INVALID_OPERATOR_SEQUENCE )
237
+
238
+ if token_type == TOKEN_SYMBOL :
239
+ ast .append (self .Symbol (token_string ))
220
240
if TRACE_PARSE :
221
- print (' ast: token == TOKEN_SYMBOL: append new symbol' , repr (ast ))
241
+ print (' ast: token_type is TOKEN_SYMBOL: append new symbol' , repr (ast ))
222
242
223
- elif isinstance (token , Symbol ):
224
- ast .append (token )
243
+ elif isinstance (token_type , Symbol ):
244
+ ast .append (token_type )
225
245
if TRACE_PARSE :
226
- print (' ast: isinstance(token, Symbol): append existing symbol' , repr (ast ))
246
+ print (' ast: token_type is Symbol): append existing symbol' , repr (ast ))
227
247
228
- elif token == TOKEN_TRUE :
248
+ elif token_type == TOKEN_TRUE :
229
249
ast .append (self .TRUE )
230
- if TRACE_PARSE : print ('ast4 :' , repr (ast ))
250
+ if TRACE_PARSE : print (' ast: token_type is TOKEN_TRUE :' , repr (ast ))
231
251
232
- elif token == TOKEN_FALSE :
252
+ elif token_type == TOKEN_FALSE :
233
253
ast .append (self .FALSE )
234
- if TRACE_PARSE : print ('ast5 :' , repr (ast ))
254
+ if TRACE_PARSE : print (' ast: token_type is TOKEN_FALSE :' , repr (ast ))
235
255
236
- elif token == TOKEN_NOT :
256
+ elif token_type == TOKEN_NOT :
237
257
ast = [ast , self .NOT ]
238
- if TRACE_PARSE : print ('ast6:' , repr (ast ))
258
+ if TRACE_PARSE : print (' ast: token_type is TOKEN_NOT:' , repr (ast ))
259
+
260
+ elif token_type == TOKEN_AND :
261
+ # if not prev_token or not is_sym(prev_token_type):
262
+ # raise ParseError(token_type, token_string, token_position, PARSE_INVALID_OPERATOR_SEQUENCE)
239
263
240
- elif token == TOKEN_AND :
241
264
ast = self ._start_operation (ast , self .AND , precedence )
242
- if TRACE_PARSE : print (' ast: token == TOKEN_AND: start_operation' , repr (ast ))
265
+ if TRACE_PARSE :
266
+ print (' ast:token_type is TOKEN_AND: start_operation' , ast )
267
+
268
+ elif token_type == TOKEN_OR :
269
+ # if not prev_token or not is_sym(prev_token_type):
270
+ # raise ParseError(token_type, token_string, token_position, PARSE_INVALID_OPERATOR_SEQUENCE)
243
271
244
- elif token == TOKEN_OR :
245
272
ast = self ._start_operation (ast , self .OR , precedence )
246
- if TRACE_PARSE : print (' ast: token == TOKEN_OR: start_operation' , repr (ast ))
273
+ if TRACE_PARSE :
274
+ print (' ast:token_type is TOKEN_OR: start_operation' , ast )
247
275
248
- elif token == TOKEN_LPAR :
249
- if prev :
250
- ptoktype , _ptokstr , _pposition = prev
276
+ elif token_type == TOKEN_LPAR :
277
+ if prev_token :
251
278
# Check that an opening parens is preceded by a function
252
279
# or an opening parens
253
- if ptoktype not in (TOKEN_NOT , TOKEN_AND , TOKEN_OR , TOKEN_LPAR ):
254
- raise ParseError (token , tokstr , position , PARSE_INVALID_NESTING )
280
+ if prev_token_type not in (TOKEN_NOT , TOKEN_AND , TOKEN_OR , TOKEN_LPAR ):
281
+ raise ParseError (token_type , token_string , token_position , PARSE_INVALID_NESTING )
255
282
ast = [ast , TOKEN_LPAR ]
256
283
257
- elif token == TOKEN_RPAR :
284
+ elif token_type == TOKEN_RPAR :
258
285
while True :
259
286
if ast [0 ] is None :
260
- raise ParseError (token , tokstr , position , PARSE_UNBALANCED_CLOSING_PARENS )
287
+ raise ParseError (token_type , token_string , token_position , PARSE_UNBALANCED_CLOSING_PARENS )
288
+
261
289
if ast [1 ] is TOKEN_LPAR :
262
290
ast [0 ].append (ast [2 ])
263
291
if TRACE_PARSE : print ('ast9:' , repr (ast ))
@@ -266,89 +294,98 @@ def is_sym(_t):
266
294
break
267
295
268
296
if isinstance (ast [1 ], int ):
269
- raise ParseError (token , tokstr , position , PARSE_UNBALANCED_CLOSING_PARENS )
297
+ raise ParseError (token_type , token_string , token_position , PARSE_UNBALANCED_CLOSING_PARENS )
270
298
271
299
# the parens are properly nested
272
300
# the top ast node should be a function subclass
273
301
if not (inspect .isclass (ast [1 ]) and issubclass (ast [1 ], Function )):
274
- raise ParseError (token , tokstr , position , PARSE_INVALID_NESTING )
302
+ raise ParseError (token_type , token_string , token_position , PARSE_INVALID_NESTING )
275
303
276
304
subex = ast [1 ](* ast [2 :])
277
305
ast [0 ].append (subex )
278
306
if TRACE_PARSE : print ('ast11:' , repr (ast ))
279
307
ast = ast [0 ]
280
308
if TRACE_PARSE : print ('ast12:' , repr (ast ))
281
309
else :
282
- raise ParseError (token , tokstr , position , PARSE_UNKNOWN_TOKEN )
310
+ raise ParseError (token_type , token_string , token_position , PARSE_UNKNOWN_TOKEN )
283
311
284
- prev = (token , tokstr , position )
312
+ prev_token = (token_type , token_string , token_position )
285
313
286
314
try :
287
315
while True :
288
316
if ast [0 ] is None :
317
+ if TRACE_PARSE : print ('ast[0] is None:' , repr (ast ))
289
318
if ast [1 ] is None :
319
+ if TRACE_PARSE : print (' ast[1] is None:' , repr (ast ))
290
320
if len (ast ) != 3 :
291
321
raise ParseError (error_code = PARSE_INVALID_EXPRESSION )
292
322
parsed = ast [2 ]
293
- if TRACE_PARSE : print ('parsed1:' , repr (parsed ))
323
+ if TRACE_PARSE : print (' parsed = ast[2]:' , repr (parsed ))
324
+
294
325
else :
326
+ # call the function in ast[1] with the rest of the ast as args
295
327
parsed = ast [1 ](* ast [2 :])
296
- if TRACE_PARSE : print ('parsed2 :' , repr (parsed ))
328
+ if TRACE_PARSE : print (' parsed = ast[1](*ast[2:]) :' , repr (parsed ))
297
329
break
298
330
else :
331
+ if TRACE_PARSE : print ('subex = ast[1](*ast[2:]):' , repr (ast ))
299
332
subex = ast [1 ](* ast [2 :])
300
333
ast [0 ].append (subex )
301
- if TRACE_PARSE : print ('ast13 :' , repr (ast ))
334
+ if TRACE_PARSE : print (' ast[0].append(subex) :' , repr (ast ))
302
335
ast = ast [0 ]
303
- if TRACE_PARSE : print ('ast14 :' , repr (ast ))
336
+ if TRACE_PARSE : print (' ast = ast[0] :' , repr (ast ))
304
337
except TypeError :
305
338
raise ParseError (error_code = PARSE_INVALID_EXPRESSION )
306
339
307
- if TRACE_PARSE : print ('parsed3:' , repr (parsed ))
308
340
if simplify :
309
341
return parsed .simplify ()
342
+
343
+ if TRACE_PARSE : print ('final parsed:' , repr (parsed ))
310
344
return parsed
311
345
312
346
def _start_operation (self , ast , operation , precedence ):
313
347
"""
314
348
Returns an AST where all operations of lower precedence are finalized.
315
349
"""
316
- if TRACE_PARSE : print (' start_operation: ast, operation, precedence' , repr (ast ), repr (operation ), repr (precedence ))
350
+ if TRACE_PARSE :
351
+ print (' start_operation:' , repr (operation ), 'AST:' , ast )
352
+
317
353
op_prec = precedence [operation ]
318
354
while True :
319
- if ast [1 ] is None : # [None, None, x]
320
- if TRACE_PARSE : print (' start_op: ast[1] is None:' , repr (ast ))
355
+ if ast [1 ] is None :
356
+ # [None, None, x]
357
+ if TRACE_PARSE : print (' start_op: ast[1] is None:' , repr (ast ))
321
358
ast [1 ] = operation
322
- if TRACE_PARSE : print (' --> start_op: ast[1] is None:' , repr (ast ))
359
+ if TRACE_PARSE : print (' --> start_op: ast[1] is None:' , repr (ast ))
323
360
return ast
324
361
325
362
prec = precedence [ast [1 ]]
326
363
if prec > op_prec : # op=&, [ast, |, x, y] -> [[ast, |, x], &, y]
327
- if TRACE_PARSE : print (' start_op: prec > op_prec:' , repr (ast ))
364
+ if TRACE_PARSE : print (' start_op: prec > op_prec:' , repr (ast ))
328
365
ast = [ast , operation , ast .pop (- 1 )]
329
- if TRACE_PARSE : print (' --> start_op: prec > op_prec:' , repr (ast ))
366
+ if TRACE_PARSE : print (' --> start_op: prec > op_prec:' , repr (ast ))
330
367
return ast
331
368
332
369
if prec == op_prec : # op=&, [ast, &, x] -> [ast, &, x]
333
- if TRACE_PARSE : print (' start_op: prec == op_prec:' , repr (ast ))
370
+ if TRACE_PARSE : print (' start_op: prec == op_prec:' , repr (ast ))
334
371
return ast
335
372
336
373
if not (inspect .isclass (ast [1 ]) and issubclass (ast [1 ], Function )):
337
374
# the top ast node should be a function subclass at this stage
338
375
raise ParseError (error_code = PARSE_INVALID_NESTING )
339
376
340
377
if ast [0 ] is None : # op=|, [None, &, x, y] -> [None, |, x&y]
341
- if TRACE_PARSE : print (' start_op: ast[0] is None:' , repr (ast ))
378
+ if TRACE_PARSE : print (' start_op: ast[0] is None:' , repr (ast ))
342
379
subexp = ast [1 ](* ast [2 :])
343
380
new_ast = [ast [0 ], operation , subexp ]
344
- if TRACE_PARSE : print (' --> start_op: ast[0] is None:' , repr (new_ast ))
381
+ if TRACE_PARSE : print (' --> start_op: ast[0] is None:' , repr (new_ast ))
345
382
return new_ast
346
383
347
384
else : # op=|, [[ast, &, x], ~, y] -> [ast, &, x, ~y]
348
- if TRACE_PARSE : print (' start_op: else:' , repr (ast ))
385
+ if TRACE_PARSE : print (' start_op: else:' , repr (ast ))
349
386
ast [0 ].append (ast [1 ](* ast [2 :]))
350
387
ast = ast [0 ]
351
- if TRACE_PARSE : print (' --> start_op: else:' , repr (ast ))
388
+ if TRACE_PARSE : print (' --> start_op: else:' , repr (ast ))
352
389
353
390
def tokenize (self , expr ):
354
391
"""
0 commit comments