Skip to content

Commit

Permalink
Реализовал простой lexer.
Browse files Browse the repository at this point in the history
  • Loading branch information
sergey-raevskiy committed Sep 2, 2014
1 parent 536c5cb commit 0256199
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 3 deletions.
190 changes: 188 additions & 2 deletions dwarf-cs/dwarf.core/lang/Lexer.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,198 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Linq;

namespace dwarf.core.lang
{
interface ITokenRule
{
bool Check(char c, int pos);
Token CreateToken(string src);
}

internal class IdentifierRule : ITokenRule
{
public bool Check(char c, int pos)
{
return pos == 0 ? Char.IsLetter(c) : Char.IsLetterOrDigit(c);
}

public Token CreateToken(string src)
{
return new IdentifierToken(src);
}
}

internal class KeywordRule : ITokenRule
{
private string keyword;

public KeywordRule(string keyword)
{
this.keyword = keyword;
}

public bool Check(char c, int pos)
{
return pos < keyword.Length && keyword[pos] == c;
}

public Token CreateToken(string src)
{
return new KeywordToken(src);
}
}

internal class NumberRule : ITokenRule
{
public bool Check(char c, int pos)
{
if (pos == 0 && (c == '-' && c == '+'))
return true;
else
return Char.IsNumber(c);
}

public Token CreateToken(string src)
{
return new ConstantToken(long.Parse(src));
}
}

internal class WhitespaceRule : ITokenRule
{
public bool Check(char c, int pos)
{
return Char.IsWhiteSpace(c);
}

public Token CreateToken(string src)
{
return new WhitespaceToken();
}
}

internal class Matcher
{
private ITokenRule rule;
private bool stop;
private string matched;

public Token Token { get; private set; }

public Matcher(ITokenRule rule)
{
this.rule = rule;
}

public void Reset()
{
stop = false;
matched = String.Empty;
Token = null;
}

public void Update(char c)
{
if (stop)
return;

if (rule.Check(c, matched.Length))
{
matched += c;
}
else
{
stop = true;
}

if (stop && matched.Length > 0)
{
Token = rule.CreateToken(matched);
}
}
}

public class Lexer
{
private static readonly ITokenRule[] Rules =
{
new WhitespaceRule(),

new KeywordRule("if"),
new KeywordRule("then"),
new KeywordRule("else"),

new KeywordRule("+"),
new KeywordRule("-"),
new KeywordRule("*"),
new KeywordRule("/"),

new KeywordRule("while"),

new KeywordRule("{"),
new KeywordRule("}"),

new KeywordRule("("),
new KeywordRule(")"),

new KeywordRule("=="),

new KeywordRule(";"),

new KeywordRule(":="),

new IdentifierRule(),
new NumberRule(),
};

public IEnumerable<Token> Tokenize(string source)
{
yield break;
var matchers = Rules.Select(rule => new Matcher(rule)).ToArray();

var state = 0;
var pos = 0;

// HACK
source += (char) 0xffff;

while (pos < source.Length)
{
if (state == 0)
{
foreach (var matcher in matchers)
{
matcher.Reset();
}

state = 1;
}

foreach (var matcher in matchers)
{
matcher.Update(source[pos]);
}

Token token = null;
foreach (var matcher in matchers)
{
if (matcher.Token != null)
{
token = matcher.Token;
break;
}
}

if (token != null)
{
state = 0;
yield return token;
}
else
{
pos ++;
}
}
}
}
}
54 changes: 53 additions & 1 deletion dwarf-cs/dwarf.core/lang/Tokens.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,58 @@
{
public class Token
{

}

public class KeywordToken : Token
{
private string keyword;

public KeywordToken(string keyword)
{
this.keyword = keyword;
}

public override string ToString()
{
return keyword;
}
}

public class IdentifierToken : Token
{
private string name;

public IdentifierToken(string name)
{
this.name = name;
}

public override string ToString()
{
return string.Format("[{0}]", name);
}
}

public class ConstantToken : Token
{
private long value;

public ConstantToken(long value)
{
this.value = value;
}

public override string ToString()
{
return string.Format("<{0}>", value);
}
}

public class WhitespaceToken : Token
{
public override string ToString()
{
return " ";
}
}
}

0 comments on commit 0256199

Please sign in to comment.