-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.cpp
192 lines (184 loc) · 5.02 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#include <iostream>
#include <string>
#include <sstream>
#include <cctype>
#include "lexer.hpp"
namespace sre {
namespace console
{
//== Token ==//
Token::Token ( TOKEN type ) {
this->type = type;
this->value = "";
}
Token::Token ( TOKEN type, std::string value ) {
this->type = type;
this->value = value;
}
std::string Token::toString () {
return value;
}
bool Token::operator== ( Token rarg ) {
return ( this->type == rarg.type);
}
bool Token::operator== ( Token::TOKEN type ) {
return (this->type == type);
}
bool Token::operator!= ( Token rarg ) {
return ( this->type != rarg.type);
}
bool Token::operator!= ( Token::TOKEN type ) {
return (this->type != type);
}
std::string Token::name () {
switch(type)
{
case IDENTIFIER: return "identifier";
case SEPARATOR: return "separator";
case INT_LIT: return "integer literal";
case FLOAT_LIT: return "float literal";
case CHAR_LIT: return "character literal";
case STRING_LIT: return "string literal";
case END_OF_INPUT: return "end-of-input";
case ERROR: return "token error";
default: return "missing token identifier";
}
}
//== Lexer ==//
//// ctor ////
Lexer::Lexer ( const Lexer& lex) {
this->input = new std::istringstream(lex.input->str());
this->ch = ' ';
}
Lexer::Lexer ( std::string input ) {
this->input = new std::istringstream(input);
this->ch = ' ';
}
//// dtor ////
Lexer::~Lexer () {
delete (this->input);
}
//// state methods ////
void Lexer::space () {
while( input->good() && std::isspace(ch)) {
ch = this->input->get();
}
}
Token Lexer::identifier () {
std::string str = "";
do {
str.push_back(ch);
ch = this->input->get();
} while ( input->good() && isalnum(ch) ); // [a-zA-Z0-9]
return Token(Token::IDENTIFIER, str);
}
Token Lexer::numLit () {
std::string str = "";
do {
str.push_back(ch);
ch = this->input->get();
} while ( input->good() && isdigit(this->ch));
if (ch == '.' ) { // [.] /// decimal point
return floatLit(str); // must be a float liter
}
else if ( std::isalpha(ch) )
return Token(Token::ERROR, "invalid character after number");
else {
return(Token(Token::INT_LIT, str)); // must be an int literal
}
}
Token Lexer::floatLit (std::string lh) {
std::string str = lh;
str.push_back(ch);
ch = this->input->get();
if (!std::isdigit(ch)) {
str.push_back('0');
}
while ( input->good() && std::isdigit(ch)) {
str.push_back(ch);
ch = this->input->get();
}
if ( std::isalpha(ch) )
return Token(Token::ERROR, "invalid character after number");
else
return Token(Token::FLOAT_LIT, str);
}
Token Lexer::charLit () {
std::string str = "";
ch = this->input->get();
if ( input->good() && ch == '\'') { //empty can only be a string literal
ch = this->input->get();
return Token(Token::STRING_LIT, str);
}
if ( input->good() && ch != '\'') {
str.push_back(ch);
ch = this->input->get();
}
if ( ch == '\'' ) { // terminating single quote
ch = this->input->get();
return Token(Token::CHAR_LIT, str);
} else if ( input->good() ) {
return stringLit(str);
} else {
return Token(Token::ERROR,
"character literal missing end [\']");
}
}
Token Lexer::stringLit (std::string lh) {
std::string str = lh;
do {
str.push_back(ch);
ch = this->input->get();
} while ( input->good() && ch != '\'');
if( ch == '\'') { // terminating single quote
ch = this->input->get();
return Token(Token::STRING_LIT, str);
} else {
return Token(Token::ERROR,
"string literal missing end [\']");
}
}
Token Lexer::separator () {
std::string str;
str.push_back(ch);
ch = this->input->get();
return Token(Token::SEPARATOR, str);
}
/** Get next token
* \return The next token from the Lexer's inputstream. Token::ERROR is returned if**/
Token Lexer::next () {
// skip white space
if (std::isspace(ch)) { // [ \f\n\r\t\v]
space();
}
if ( ch == EOF ) {
return Token(Token::END_OF_INPUT);
}
// is identifier
else if (std::isalpha(ch)) { // [a-zA-Z]
return identifier();
}
// is numeric literal
else if (std::isdigit(ch)) { // [0-9]
return numLit();
}
else if (ch == '.') {
return floatLit("0");
}
// is char or string literal
else if (ch == '\'') {
return charLit();
}
// is separator
else if (ch == ';' || ch == ',') {
return separator();
}
else {
std:: string str = "invalid character [";
str.push_back(ch);
str.append("] for token");
return Token(Token::ERROR, str);
}
}
}
}