Skip to content

Commit 810357b

Browse files
committed
#60 'a or b c' expression now raise an exception
* add check in BooleanAlgebra.parse to raise a Parse when two bare symbols are after each other without any keyword or operation * add optional tracing of what happens when the AST is built with a TRACE_PARSE top level flag to help with AST parsing debugging * add corresponding tests and improve ParseError tests to check for the error_code in tests Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 619b14c commit 810357b

File tree

2 files changed

+103
-12
lines changed

2 files changed

+103
-12
lines changed

boolean/boolean.py

+56-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from __future__ import absolute_import
2424
from __future__ import unicode_literals
25+
from __future__ import print_function
2526

2627
import inspect
2728
import itertools
@@ -31,6 +32,8 @@
3132
except NameError:
3233
basestring = str # Python 3
3334

35+
# Set to True to enable tracing for parsing
36+
TRACE_PARSE = False
3437

3538
# Token types for standard operators and parens
3639
TOKEN_AND = 1
@@ -59,12 +62,14 @@
5962
PARSE_UNBALANCED_CLOSING_PARENS = 2
6063
PARSE_INVALID_EXPRESSION = 3
6164
PARSE_INVALID_NESTING = 4
65+
PARSE_INVALID_SYMBOL_SEQUENCE = 5
6266

6367
PARSE_ERRORS = {
6468
PARSE_UNKNOWN_TOKEN: 'Unknown token',
6569
PARSE_UNBALANCED_CLOSING_PARENS: 'Unbalanced parenthesis',
6670
PARSE_INVALID_EXPRESSION: 'Invalid expression',
67-
PARSE_INVALID_NESTING: 'Invalid expression nesting such as (AND xx)'
71+
PARSE_INVALID_NESTING: 'Invalid expression nesting such as (AND xx)',
72+
PARSE_INVALID_SYMBOL_SEQUENCE: 'Invalid symbols sequence such as (A B)',
6873
}
6974

7075

@@ -197,27 +202,54 @@ def parse(self, expr, simplify=False):
197202
else:
198203
tokenized = iter(expr)
199204

205+
if TRACE_PARSE:
206+
tokenized = list(tokenized)
207+
print('tokens:')
208+
map(print, tokenized)
209+
tokenized = iter(tokenized)
210+
200211
ast = [None, None]
201212

213+
def is_sym(_t):
214+
return _t == TOKEN_SYMBOL or isinstance(_t, Symbol)
215+
202216
prev = None
203217
for tok in tokenized:
218+
if TRACE_PARSE: print('\nprocessing token:', repr(tok))
204219
token, tokstr, position = tok
220+
221+
if prev:
222+
prev_token, _, _ = prev
223+
if is_sym(prev_token) and is_sym(token):
224+
raise ParseError(token, tokstr, position, PARSE_INVALID_SYMBOL_SEQUENCE)
225+
205226
if token == TOKEN_SYMBOL:
206227
ast.append(self.Symbol(tokstr))
228+
if TRACE_PARSE: print(' ast: token == TOKEN_SYMBOL: append new symbol', repr(ast))
229+
207230
elif isinstance(token, Symbol):
208231
ast.append(token)
232+
if TRACE_PARSE: print(' ast: isinstance(token, Symbol): append existing symbol', repr(ast))
209233

210234
elif token == TOKEN_TRUE:
211235
ast.append(self.TRUE)
236+
if TRACE_PARSE: print('ast4:', repr(ast))
237+
212238
elif token == TOKEN_FALSE:
213239
ast.append(self.FALSE)
240+
if TRACE_PARSE: print('ast5:', repr(ast))
214241

215242
elif token == TOKEN_NOT:
216243
ast = [ast, self.NOT]
244+
if TRACE_PARSE: print('ast6:', repr(ast))
245+
217246
elif token == TOKEN_AND:
218247
ast = self._start_operation(ast, self.AND, precedence)
248+
if TRACE_PARSE: print(' ast: token == TOKEN_AND: start_operation', repr(ast))
249+
219250
elif token == TOKEN_OR:
220251
ast = self._start_operation(ast, self.OR, precedence)
252+
if TRACE_PARSE: print(' ast: token == TOKEN_OR: start_operation', repr(ast))
221253

222254
elif token == TOKEN_LPAR:
223255
if prev:
@@ -227,13 +259,16 @@ def parse(self, expr, simplify=False):
227259
if ptoktype not in (TOKEN_NOT, TOKEN_AND, TOKEN_OR, TOKEN_LPAR):
228260
raise ParseError(token, tokstr, position, PARSE_INVALID_NESTING)
229261
ast = [ast, TOKEN_LPAR]
262+
230263
elif token == TOKEN_RPAR:
231264
while True:
232265
if ast[0] is None:
233266
raise ParseError(token, tokstr, position, PARSE_UNBALANCED_CLOSING_PARENS)
234267
if ast[1] is TOKEN_LPAR:
235268
ast[0].append(ast[2])
269+
if TRACE_PARSE: print('ast9:', repr(ast))
236270
ast = ast[0]
271+
if TRACE_PARSE: print('ast10:', repr(ast))
237272
break
238273

239274
if isinstance(ast[1], int):
@@ -246,10 +281,12 @@ def parse(self, expr, simplify=False):
246281

247282
subex = ast[1](*ast[2:])
248283
ast[0].append(subex)
284+
if TRACE_PARSE: print('ast11:', repr(ast))
249285
ast = ast[0]
286+
if TRACE_PARSE: print('ast12:', repr(ast))
250287
else:
251288
raise ParseError(token, tokstr, position, PARSE_UNKNOWN_TOKEN)
252-
289+
253290
prev = tok
254291

255292
try:
@@ -259,16 +296,21 @@ def parse(self, expr, simplify=False):
259296
if len(ast) != 3:
260297
raise ParseError(error_code=PARSE_INVALID_EXPRESSION)
261298
parsed = ast[2]
299+
if TRACE_PARSE: print('parsed1:', repr(parsed))
262300
else:
263301
parsed = ast[1](*ast[2:])
302+
if TRACE_PARSE: print('parsed2:', repr(parsed))
264303
break
265304
else:
266305
subex = ast[1](*ast[2:])
267306
ast[0].append(subex)
307+
if TRACE_PARSE: print('ast13:', repr(ast))
268308
ast = ast[0]
309+
if TRACE_PARSE: print('ast14:', repr(ast))
269310
except TypeError:
270311
raise ParseError(error_code=PARSE_INVALID_EXPRESSION)
271312

313+
if TRACE_PARSE: print('parsed3:', repr(parsed))
272314
if simplify:
273315
return parsed.simplify()
274316
return parsed
@@ -277,31 +319,42 @@ def _start_operation(self, ast, operation, precedence):
277319
"""
278320
Returns an AST where all operations of lower precedence are finalized.
279321
"""
322+
if TRACE_PARSE: print(' start_operation: ast, operation, precedence', repr(ast), repr(operation), repr(precedence))
280323
op_prec = precedence[operation]
281324
while True:
282325
if ast[1] is None: # [None, None, x]
326+
if TRACE_PARSE: print(' start_op: ast[1] is None:', repr(ast))
283327
ast[1] = operation
328+
if TRACE_PARSE: print(' --> start_op: ast[1] is None:', repr(ast))
284329
return ast
285330

286331
prec = precedence[ast[1]]
287332
if prec > op_prec: # op=&, [ast, |, x, y] -> [[ast, |, x], &, y]
333+
if TRACE_PARSE: print(' start_op: prec > op_prec:', repr(ast))
288334
ast = [ast, operation, ast.pop(-1)]
335+
if TRACE_PARSE: print(' --> start_op: prec > op_prec:', repr(ast))
289336
return ast
290337

291338
if prec == op_prec: # op=&, [ast, &, x] -> [ast, &, x]
339+
if TRACE_PARSE: print(' start_op: prec == op_prec:', repr(ast))
292340
return ast
293341

294342
if not (inspect.isclass(ast[1]) and issubclass(ast[1], Function)):
295343
# the top ast node should be a function subclass at this stage
296344
raise ParseError(error_code=PARSE_INVALID_NESTING)
297345

298346
if ast[0] is None: # op=|, [None, &, x, y] -> [None, |, x&y]
347+
if TRACE_PARSE: print(' start_op: ast[0] is None:', repr(ast))
299348
subexp = ast[1](*ast[2:])
300-
return [ast[0], operation, subexp]
349+
new_ast = [ast[0], operation, subexp]
350+
if TRACE_PARSE: print(' --> start_op: ast[0] is None:', repr(new_ast))
351+
return new_ast
301352

302353
else: # op=|, [[ast, &, x], ~, y] -> [ast, &, x, ~y]
354+
if TRACE_PARSE: print(' start_op: else:', repr(ast))
303355
ast[0].append(ast[1](*ast[2:]))
304356
ast = ast[0]
357+
if TRACE_PARSE: print(' --> start_op: else:', repr(ast))
305358

306359
def tokenize(self, expr):
307360
"""

boolean/test_boolean.py

+47-9
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
Released under revised BSD license.
88
"""
99

10-
from __future__ import absolute_import, unicode_literals
11-
12-
import unittest
13-
from unittest.case import expectedFailure
10+
from __future__ import absolute_import
11+
from __future__ import unicode_literals
12+
from __future__ import print_function
13+
from boolean.boolean import PARSE_UNKNOWN_TOKEN
1414

1515
try:
1616
basestring # Python 2
1717
except NameError:
1818
basestring = str # Python 3
1919

20+
import unittest
21+
from unittest.case import expectedFailure
2022

2123
from boolean import BooleanAlgebra
2224
from boolean import ParseError
@@ -29,6 +31,9 @@
2931
from boolean import TOKEN_SYMBOL
3032
from boolean import TOKEN_LPAR
3133
from boolean import TOKEN_RPAR
34+
from boolean.boolean import PARSE_INVALID_SYMBOL_SEQUENCE
35+
from boolean.boolean import PARSE_INVALID_EXPRESSION
36+
from boolean.boolean import PARSE_INVALID_NESTING
3237

3338

3439
class BooleanAlgebraTestCase(unittest.TestCase):
@@ -281,13 +286,43 @@ def test_parse_raise_ParseError(self):
281286
]
282287

283288
for expr in invalid_expressions:
284-
print(expr)
285289
try:
286290
algebra.parse(expr)
287291
self.fail("Exception should be raised when parsing '%s'" % expr)
288-
except ParseError:
289-
pass
292+
except ParseError as pe:
293+
assert pe.error_code == PARSE_UNKNOWN_TOKEN
290294

295+
def test_parse_side_by_side_symbols_should_raise_exception_but_not(self):
296+
algebra = BooleanAlgebra()
297+
expr_str = 'a or b c'
298+
try:
299+
algebra.parse(expr_str)
300+
except ParseError as pe:
301+
assert pe.error_code == PARSE_INVALID_SYMBOL_SEQUENCE
302+
303+
def test_parse_side_by_side_symbols_should_raise_exception_but_not2(self):
304+
algebra = BooleanAlgebra()
305+
expr_str = '(a or b) c'
306+
try:
307+
algebra.parse(expr_str)
308+
except ParseError as pe:
309+
assert pe.error_code == PARSE_INVALID_EXPRESSION
310+
311+
def test_parse_side_by_side_symbols_raise_exception(self):
312+
algebra = BooleanAlgebra()
313+
expr_str = 'a b'
314+
try:
315+
algebra.parse(expr_str)
316+
except ParseError as pe:
317+
assert pe.error_code == PARSE_INVALID_SYMBOL_SEQUENCE
318+
319+
def test_parse_side_by_side_symbols_with_parens_raise_exception(self):
320+
algebra = BooleanAlgebra()
321+
expr_str = '(a) (b)'
322+
try:
323+
algebra.parse(expr_str)
324+
except ParseError as pe:
325+
assert pe.error_code == PARSE_INVALID_NESTING
291326

292327
class BaseElementTestCase(unittest.TestCase):
293328

@@ -783,8 +818,11 @@ def test_simplify_complex_expression_parsed_then_simplified(self):
783818

784819
def test_parse_invalid_nested_and_should_raise_a_proper_exception(self):
785820
algebra = BooleanAlgebra()
786-
test_expression_str = '''a (and b)'''
787-
self.assertRaises(ParseError, algebra.parse, test_expression_str)
821+
expr = '''a (and b)'''
822+
try:
823+
algebra.parse(expr)
824+
except ParseError as pe:
825+
assert pe.error_code == PARSE_INVALID_NESTING
788826

789827
def test_subtract(self):
790828
parse = BooleanAlgebra().parse

0 commit comments

Comments
 (0)