@@ -234,11 +234,47 @@ class Lexer
234
234
// Input source wrapper thing.
235
235
class InputSource
236
236
{
237
+ private:
238
+ unsigned int pos;
239
+ std::vector<int > chars;
240
+
241
+ // Overload operator () to return next char from input stream.
242
+ virtual int next_byte () = 0;
243
+
244
+ int next ()
245
+ {
246
+ if (pos >= chars.size ())
247
+ return EOF;
248
+ else
249
+ {
250
+ int c = chars[pos];
251
+ pos++;
252
+ return c;
253
+ }
254
+ }
255
+
237
256
public:
257
+ InputSource () : pos (0 ), chars ({}) {}
258
+
238
259
virtual ~InputSource () {}
239
260
240
- // Overload operator () to return next char from input stream.
241
- virtual int next () = 0;
261
+ // Check if the input source is valid as utf-8 and buffer all characters to
262
+ // `chars`.
263
+ void init ()
264
+ {
265
+ // TODO remove
266
+ std::cout << " Checking if input is valid as utf-8." << std::endl;
267
+
268
+ // TODO skip UTF BOM
269
+
270
+ int c = next_byte ();
271
+ while (c != EOF)
272
+ {
273
+ // TODO validate utf-8 encoding and push one codepoint to `chars`
274
+ chars.push_back (c);
275
+ c = next_byte ();
276
+ }
277
+ }
242
278
};
243
279
244
280
class FileInputSource : public InputSource
@@ -247,11 +283,15 @@ class Lexer
247
283
// Input source file.
248
284
FILE *input;
249
285
286
+ int next_byte () override { return fgetc (input); }
287
+
250
288
public:
251
289
// Create new input source from file.
252
- FileInputSource (FILE *input) : input (input) {}
253
-
254
- int next () override { return fgetc (input); }
290
+ FileInputSource (FILE *input) : InputSource (), input (input)
291
+ {
292
+ // TODO make this better
293
+ init ();
294
+ }
255
295
};
256
296
257
297
class BufferInputSource : public InputSource
@@ -260,19 +300,22 @@ class Lexer
260
300
const std::string &buffer;
261
301
size_t offs;
262
302
263
- public:
264
- // Create new input source from file.
265
- BufferInputSource (const std::string &b, size_t offset)
266
- : buffer (b), offs (offset)
267
- {}
268
-
269
- int next () override
303
+ int next_byte () override
270
304
{
271
305
if (offs >= buffer.size ())
272
306
return EOF;
273
307
274
308
return buffer.at (offs++);
275
309
}
310
+
311
+ public:
312
+ // Create new input source from file.
313
+ BufferInputSource (const std::string &b, size_t offset)
314
+ : InputSource (), buffer (b), offs (offset)
315
+ {
316
+ // TODO make this better
317
+ init ();
318
+ }
276
319
};
277
320
278
321
// The input source for the lexer.
0 commit comments