Skip to content

Commit e984df4

Browse files
authored
Merge pull request #81 from bastikr/improve-error-checking
Improve error checking
2 parents 74796a4 + 309db2b commit e984df4

File tree

4 files changed

+180
-79
lines changed

4 files changed

+180
-79
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
/tmp/
77
/.cache/
88
/.eggs/
9+
/.pytest_cache/

boolean/boolean.py

+93-56
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@
2727
import inspect
2828
import itertools
2929

30+
# Python 2 and 3
3031
try:
31-
basestring # Python 2
32+
basestring # NOQA
3233
except NameError:
33-
basestring = str # Python 3
34+
basestring = str # NOQA
3435

3536
# Set to True to enable tracing for parsing
3637
TRACE_PARSE = False
@@ -63,13 +64,15 @@
6364
PARSE_INVALID_EXPRESSION = 3
6465
PARSE_INVALID_NESTING = 4
6566
PARSE_INVALID_SYMBOL_SEQUENCE = 5
67+
PARSE_INVALID_OPERATOR_SEQUENCE = 6
6668

6769
PARSE_ERRORS = {
6870
PARSE_UNKNOWN_TOKEN: 'Unknown token',
6971
PARSE_UNBALANCED_CLOSING_PARENS: 'Unbalanced parenthesis',
7072
PARSE_INVALID_EXPRESSION: 'Invalid expression',
7173
PARSE_INVALID_NESTING: 'Invalid expression nesting such as (AND xx)',
7274
PARSE_INVALID_SYMBOL_SEQUENCE: 'Invalid symbols sequence such as (A B)',
75+
PARSE_INVALID_OPERATOR_SEQUENCE: 'Invalid operator sequence without symbols such as AND OR or OR OR',
7376
}
7477

7578

@@ -181,9 +184,9 @@ def parse(self, expr, simplify=False):
181184
and the algebra configured Symbol type is used to create Symbol
182185
instances from Symbol tokens.
183186
184-
If `expr` is an iterable, it should contain 3-tuples of: (token,
185-
token_string, position). In this case, the `token` can be a Symbol
186-
instance or one of the TOKEN_* types.
187+
If `expr` is an iterable, it should contain 3-tuples of: (token_type,
188+
token_string, token_position). In this case, the `token_type` can be
189+
a Symbol instance or one of the TOKEN_* constant types.
187190
See the `tokenize()` method for detailed specification.
188191
"""
189192

@@ -200,64 +203,89 @@ def parse(self, expr, simplify=False):
200203
map(print, tokenized)
201204
tokenized = iter(tokenized)
202205

206+
# the abstract syntax tree for this expression that will be build as we
207+
# process tokens
208+
# the first two items are None
209+
# symbol items are appended to this structure
203210
ast = [None, None]
204211

205212
def is_sym(_t):
206-
return _t == TOKEN_SYMBOL or isinstance(_t, Symbol)
213+
return isinstance(_t, Symbol) or _t in (TOKEN_TRUE, TOKEN_FALSE, TOKEN_SYMBOL)
207214

208-
prev = None
209-
for token, tokstr, position in tokenized:
215+
def is_operator(_t):
216+
return _t in (TOKEN_AND, TOKEN_OR)
217+
218+
prev_token = None
219+
for token_type, token_string, token_position in tokenized:
210220
if TRACE_PARSE:
211-
print('\nprocessing token:', repr(token), repr(tokstr), repr(position))
221+
print('\nprocessing token_type:', repr(token_type), 'token_string:', repr(token_string), 'token_position:', repr(token_position))
222+
223+
if prev_token:
224+
prev_token_type, _prev_token_string, _prev_token_position = prev_token
225+
if TRACE_PARSE:
226+
print(' prev_token:', repr(prev_token))
227+
228+
if is_sym(prev_token_type) and (is_sym(token_type)): # or token_type == TOKEN_LPAR) :
229+
raise ParseError(token_type, token_string, token_position, PARSE_INVALID_SYMBOL_SEQUENCE)
212230

213-
if prev:
214-
prev_token, _, _ = prev
215-
if is_sym(prev_token) and is_sym(token):
216-
raise ParseError(token, tokstr, position, PARSE_INVALID_SYMBOL_SEQUENCE)
231+
if is_operator(prev_token_type) and (is_operator(token_type) or token_type == TOKEN_RPAR):
232+
raise ParseError(token_type, token_string, token_position, PARSE_INVALID_OPERATOR_SEQUENCE)
217233

218-
if token == TOKEN_SYMBOL:
219-
ast.append(self.Symbol(tokstr))
234+
else:
235+
if is_operator(token_type):
236+
raise ParseError(token_type, token_string, token_position, PARSE_INVALID_OPERATOR_SEQUENCE)
237+
238+
if token_type == TOKEN_SYMBOL:
239+
ast.append(self.Symbol(token_string))
220240
if TRACE_PARSE:
221-
print(' ast: token == TOKEN_SYMBOL: append new symbol', repr(ast))
241+
print(' ast: token_type is TOKEN_SYMBOL: append new symbol', repr(ast))
222242

223-
elif isinstance(token, Symbol):
224-
ast.append(token)
243+
elif isinstance(token_type, Symbol):
244+
ast.append(token_type)
225245
if TRACE_PARSE:
226-
print(' ast: isinstance(token, Symbol): append existing symbol', repr(ast))
246+
print(' ast: token_type is Symbol): append existing symbol', repr(ast))
227247

228-
elif token == TOKEN_TRUE:
248+
elif token_type == TOKEN_TRUE:
229249
ast.append(self.TRUE)
230-
if TRACE_PARSE: print('ast4:', repr(ast))
250+
if TRACE_PARSE: print(' ast: token_type is TOKEN_TRUE:', repr(ast))
231251

232-
elif token == TOKEN_FALSE:
252+
elif token_type == TOKEN_FALSE:
233253
ast.append(self.FALSE)
234-
if TRACE_PARSE: print('ast5:', repr(ast))
254+
if TRACE_PARSE: print(' ast: token_type is TOKEN_FALSE:', repr(ast))
235255

236-
elif token == TOKEN_NOT:
256+
elif token_type == TOKEN_NOT:
237257
ast = [ast, self.NOT]
238-
if TRACE_PARSE: print('ast6:', repr(ast))
258+
if TRACE_PARSE: print(' ast: token_type is TOKEN_NOT:', repr(ast))
259+
260+
elif token_type == TOKEN_AND:
261+
# if not prev_token or not is_sym(prev_token_type):
262+
# raise ParseError(token_type, token_string, token_position, PARSE_INVALID_OPERATOR_SEQUENCE)
239263

240-
elif token == TOKEN_AND:
241264
ast = self._start_operation(ast, self.AND, precedence)
242-
if TRACE_PARSE: print(' ast: token == TOKEN_AND: start_operation', repr(ast))
265+
if TRACE_PARSE:
266+
print(' ast:token_type is TOKEN_AND: start_operation', ast)
267+
268+
elif token_type == TOKEN_OR:
269+
# if not prev_token or not is_sym(prev_token_type):
270+
# raise ParseError(token_type, token_string, token_position, PARSE_INVALID_OPERATOR_SEQUENCE)
243271

244-
elif token == TOKEN_OR:
245272
ast = self._start_operation(ast, self.OR, precedence)
246-
if TRACE_PARSE: print(' ast: token == TOKEN_OR: start_operation', repr(ast))
273+
if TRACE_PARSE:
274+
print(' ast:token_type is TOKEN_OR: start_operation', ast)
247275

248-
elif token == TOKEN_LPAR:
249-
if prev:
250-
ptoktype, _ptokstr, _pposition = prev
276+
elif token_type == TOKEN_LPAR:
277+
if prev_token:
251278
# Check that an opening parens is preceded by a function
252279
# or an opening parens
253-
if ptoktype not in (TOKEN_NOT, TOKEN_AND, TOKEN_OR, TOKEN_LPAR):
254-
raise ParseError(token, tokstr, position, PARSE_INVALID_NESTING)
280+
if prev_token_type not in (TOKEN_NOT, TOKEN_AND, TOKEN_OR, TOKEN_LPAR):
281+
raise ParseError(token_type, token_string, token_position, PARSE_INVALID_NESTING)
255282
ast = [ast, TOKEN_LPAR]
256283

257-
elif token == TOKEN_RPAR:
284+
elif token_type == TOKEN_RPAR:
258285
while True:
259286
if ast[0] is None:
260-
raise ParseError(token, tokstr, position, PARSE_UNBALANCED_CLOSING_PARENS)
287+
raise ParseError(token_type, token_string, token_position, PARSE_UNBALANCED_CLOSING_PARENS)
288+
261289
if ast[1] is TOKEN_LPAR:
262290
ast[0].append(ast[2])
263291
if TRACE_PARSE: print('ast9:', repr(ast))
@@ -266,89 +294,98 @@ def is_sym(_t):
266294
break
267295

268296
if isinstance(ast[1], int):
269-
raise ParseError(token, tokstr, position, PARSE_UNBALANCED_CLOSING_PARENS)
297+
raise ParseError(token_type, token_string, token_position, PARSE_UNBALANCED_CLOSING_PARENS)
270298

271299
# the parens are properly nested
272300
# the top ast node should be a function subclass
273301
if not (inspect.isclass(ast[1]) and issubclass(ast[1], Function)):
274-
raise ParseError(token, tokstr, position, PARSE_INVALID_NESTING)
302+
raise ParseError(token_type, token_string, token_position, PARSE_INVALID_NESTING)
275303

276304
subex = ast[1](*ast[2:])
277305
ast[0].append(subex)
278306
if TRACE_PARSE: print('ast11:', repr(ast))
279307
ast = ast[0]
280308
if TRACE_PARSE: print('ast12:', repr(ast))
281309
else:
282-
raise ParseError(token, tokstr, position, PARSE_UNKNOWN_TOKEN)
310+
raise ParseError(token_type, token_string, token_position, PARSE_UNKNOWN_TOKEN)
283311

284-
prev = (token, tokstr, position)
312+
prev_token = (token_type, token_string, token_position)
285313

286314
try:
287315
while True:
288316
if ast[0] is None:
317+
if TRACE_PARSE: print('ast[0] is None:', repr(ast))
289318
if ast[1] is None:
319+
if TRACE_PARSE: print(' ast[1] is None:', repr(ast))
290320
if len(ast) != 3:
291321
raise ParseError(error_code=PARSE_INVALID_EXPRESSION)
292322
parsed = ast[2]
293-
if TRACE_PARSE: print('parsed1:', repr(parsed))
323+
if TRACE_PARSE: print(' parsed = ast[2]:', repr(parsed))
324+
294325
else:
326+
# call the function in ast[1] with the rest of the ast as args
295327
parsed = ast[1](*ast[2:])
296-
if TRACE_PARSE: print('parsed2:', repr(parsed))
328+
if TRACE_PARSE: print(' parsed = ast[1](*ast[2:]):', repr(parsed))
297329
break
298330
else:
331+
if TRACE_PARSE: print('subex = ast[1](*ast[2:]):', repr(ast))
299332
subex = ast[1](*ast[2:])
300333
ast[0].append(subex)
301-
if TRACE_PARSE: print('ast13:', repr(ast))
334+
if TRACE_PARSE: print(' ast[0].append(subex):', repr(ast))
302335
ast = ast[0]
303-
if TRACE_PARSE: print('ast14:', repr(ast))
336+
if TRACE_PARSE: print(' ast = ast[0]:', repr(ast))
304337
except TypeError:
305338
raise ParseError(error_code=PARSE_INVALID_EXPRESSION)
306339

307-
if TRACE_PARSE: print('parsed3:', repr(parsed))
308340
if simplify:
309341
return parsed.simplify()
342+
343+
if TRACE_PARSE: print('final parsed:', repr(parsed))
310344
return parsed
311345

312346
def _start_operation(self, ast, operation, precedence):
313347
"""
314348
Returns an AST where all operations of lower precedence are finalized.
315349
"""
316-
if TRACE_PARSE: print(' start_operation: ast, operation, precedence', repr(ast), repr(operation), repr(precedence))
350+
if TRACE_PARSE:
351+
print(' start_operation:', repr(operation), 'AST:', ast)
352+
317353
op_prec = precedence[operation]
318354
while True:
319-
if ast[1] is None: # [None, None, x]
320-
if TRACE_PARSE: print(' start_op: ast[1] is None:', repr(ast))
355+
if ast[1] is None:
356+
# [None, None, x]
357+
if TRACE_PARSE: print(' start_op: ast[1] is None:', repr(ast))
321358
ast[1] = operation
322-
if TRACE_PARSE: print(' --> start_op: ast[1] is None:', repr(ast))
359+
if TRACE_PARSE: print(' --> start_op: ast[1] is None:', repr(ast))
323360
return ast
324361

325362
prec = precedence[ast[1]]
326363
if prec > op_prec: # op=&, [ast, |, x, y] -> [[ast, |, x], &, y]
327-
if TRACE_PARSE: print(' start_op: prec > op_prec:', repr(ast))
364+
if TRACE_PARSE: print(' start_op: prec > op_prec:', repr(ast))
328365
ast = [ast, operation, ast.pop(-1)]
329-
if TRACE_PARSE: print(' --> start_op: prec > op_prec:', repr(ast))
366+
if TRACE_PARSE: print(' --> start_op: prec > op_prec:', repr(ast))
330367
return ast
331368

332369
if prec == op_prec: # op=&, [ast, &, x] -> [ast, &, x]
333-
if TRACE_PARSE: print(' start_op: prec == op_prec:', repr(ast))
370+
if TRACE_PARSE: print(' start_op: prec == op_prec:', repr(ast))
334371
return ast
335372

336373
if not (inspect.isclass(ast[1]) and issubclass(ast[1], Function)):
337374
# the top ast node should be a function subclass at this stage
338375
raise ParseError(error_code=PARSE_INVALID_NESTING)
339376

340377
if ast[0] is None: # op=|, [None, &, x, y] -> [None, |, x&y]
341-
if TRACE_PARSE: print(' start_op: ast[0] is None:', repr(ast))
378+
if TRACE_PARSE: print(' start_op: ast[0] is None:', repr(ast))
342379
subexp = ast[1](*ast[2:])
343380
new_ast = [ast[0], operation, subexp]
344-
if TRACE_PARSE: print(' --> start_op: ast[0] is None:', repr(new_ast))
381+
if TRACE_PARSE: print(' --> start_op: ast[0] is None:', repr(new_ast))
345382
return new_ast
346383

347384
else: # op=|, [[ast, &, x], ~, y] -> [ast, &, x, ~y]
348-
if TRACE_PARSE: print(' start_op: else:', repr(ast))
385+
if TRACE_PARSE: print(' start_op: else:', repr(ast))
349386
ast[0].append(ast[1](*ast[2:]))
350387
ast = ast[0]
351-
if TRACE_PARSE: print(' --> start_op: else:', repr(ast))
388+
if TRACE_PARSE: print(' --> start_op: else:', repr(ast))
352389

353390
def tokenize(self, expr):
354391
"""

0 commit comments

Comments
 (0)