forked from bmajoros/python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEssexScanner.py
executable file
·103 lines (93 loc) · 3.06 KB
/
EssexScanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#=========================================================================
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
# License (GPL) version 3, as described at www.opensource.org.
# Copyright (C)2016 William H. Majoros ([email protected]).
#=========================================================================
from __future__ import (absolute_import, division, print_function,
unicode_literals, generators, nested_scopes, with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii,
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
from EssexToken import EssexToken
import re
######################################################################
#
# A token scanner for the EssexParser.
#
# Attributes:
# file : file handle
# ungot : char
# nextTok : EssexToken
# Public Methods:
# scanner=EssexScanner(filehandle)
# scanner.close()
# token=scanner.nextToken()
# token=scanner.match(tokenType)
# token=scanner.peek()
# Private Methods:
# getChar
# unGetChar
# skipWhitespace
######################################################################
class EssexScanner:
def __init__(self,file):
self.file=file
self.nextTok=None
self.ungot=None
def close(self):
self.file.close()
def nextToken(self):
token=self.peek()
self.nextTok=None
return token
def match(self,tokenType):
token=self.nextToken()
if(token.getType()!=tokenType):
lexeme=token.getLexeme()
raise Exception("Syntax error near \""+lexeme+"\"")
def peek(self):
if(not self.nextTok):
file=self.file
self.skipWhitespace()
c=self.getChar()
if(c is None): return None
tokenType=None
lexeme=""
if(c=="(" or c==")"): tokenType=c
else:
tokenType="L"
lexeme=c
while(True):
c=self.getChar()
if(not c): break
if(re.search("[\s\(\)]",c)): break
lexeme+=c
self.unGetChar(c)
lexeme=lexeme.replace("&lparen;","(")
lexeme=lexeme.replace("&rparen;",")")
lexeme=lexeme.replace("&tab;","\t")
lexeme=lexeme.replace("&space;"," ")
self.nextTok=EssexToken(tokenType,lexeme)
return self.nextTok
def getChar(self):
c=None
if(self.ungot):
c=self.ungot
self.ungot=None
else:
c=self.file.read(1)
if(len(c)==0): c=None
return c
def unGetChar(self,c):
self.ungot=c
def skipWhitespace(self):
while(True):
c=self.getChar()
if(c is None): break
if(c=="#"):
while(True):
c=self.getChar()
if(c is None or c=="\n"): break
continue
if(not re.search("\s",c)):
self.unGetChar(c)
return