diff --git a/dwarf/dwarf-compiler/ast_constructors.c b/dwarf/dwarf-compiler/ast_constructors.c index 593e577..4a8e169 100755 --- a/dwarf/dwarf-compiler/ast_constructors.c +++ b/dwarf/dwarf-compiler/ast_constructors.c @@ -37,6 +37,27 @@ ast_t* ast_div( ast_t* left, ast_t* right ) { return ast_binop( left, right, AST_DIVIDE ); } +ast_t* ast_lt( ast_t* left, ast_t* right ) { + return ast_binop( left, right, AST_LT ); +} + +ast_t* ast_le( ast_t* left, ast_t* right ) { + return ast_binop( left, right, AST_LE ); +} + +ast_t* ast_eq( ast_t* left, ast_t* right ) { + return ast_binop( left, right, AST_EQ ); +} + +ast_t* ast_ge( ast_t* left, ast_t* right ) { + return ast_binop( left, right, AST_GE ); +} + +ast_t* ast_gt( ast_t* left, ast_t* right ) { + return ast_binop( left, right, AST_GT ); +} + + ast_t* ast_seq( ast_t* left, ast_t* right) { diff --git a/dwarf/dwarf-compiler/ast_helpers.c b/dwarf/dwarf-compiler/ast_helpers.c index faf8c33..58b23a2 100755 --- a/dwarf/dwarf-compiler/ast_helpers.c +++ b/dwarf/dwarf-compiler/ast_helpers.c @@ -27,6 +27,11 @@ void ast_visit( ast_t* node, void (before)( ast_t* node ), void (after)( ast_t* case AST_MINUS: case AST_MULT: case AST_DIVIDE: + case AST_LT: + case AST_LE: + case AST_EQ: + case AST_GE: + case AST_GT: ast_visit( node->attributes.as_binop.left, before, after ); ast_visit( node->attributes.as_binop.right, before, after ); break; @@ -108,6 +113,41 @@ void ast_print( ast_t* node, FILE* out ) { ast_print( node->attributes.as_binop.right, out ); fprintf( out, " )" ); break; + case AST_LT: + fprintf( out, "( " ); + ast_print( node->attributes.as_binop.left, out ); + fprintf( out, " < " ); + ast_print( node->attributes.as_binop.right, out ); + fprintf( out, " )" ); + break; + case AST_LE: + fprintf( out, "( " ); + ast_print( node->attributes.as_binop.left, out ); + fprintf( out, " <= " ); + ast_print( node->attributes.as_binop.right, out ); + fprintf( out, " )" ); + break; + case AST_EQ: + fprintf( out, "( " ); + ast_print( node->attributes.as_binop.left, out ); + fprintf( out, " == " ); + ast_print( node->attributes.as_binop.right, out ); + fprintf( out, " )" ); + break; + case AST_GE: + fprintf( out, "( " ); + ast_print( node->attributes.as_binop.left, out ); + fprintf( out, " >= " ); + ast_print( node->attributes.as_binop.right, out ); + fprintf( out, " )" ); + break; + case AST_GT: + fprintf( out, "( " ); + ast_print( node->attributes.as_binop.left, out ); + fprintf( out, " > " ); + ast_print( node->attributes.as_binop.right, out ); + fprintf( out, " )" ); + break; case AST_SEQ: ast_print( node->attributes.as_seq.first, out ); fprintf( out, " ; " ); @@ -131,4 +171,3 @@ void ast_print( ast_t* node, FILE* out ) { break; } } - \ No newline at end of file diff --git a/dwarf/dwarf-compiler/bytecode.c b/dwarf/dwarf-compiler/bytecode.c index 0367981..e3f45b0 100755 --- a/dwarf/dwarf-compiler/bytecode.c +++ b/dwarf/dwarf-compiler/bytecode.c @@ -105,11 +105,11 @@ void bytecode_print( bytecode_t* bytecode ) { case BC_IPUSHREG: case BC_IPOPREG: - printf( " %d\n", bytecode-> params.reg ); + printf( " %d", bytecode-> params.reg ); break; case BC_JMP: case BC_JZ: - printf( " %d\n", bytecode-> params.offset ); + printf( " [%d]", bytecode-> params.offset ); break; default: break; diff --git a/dwarf/dwarf-compiler/bytecode.h b/dwarf/dwarf-compiler/bytecode.h index 8971e38..1bd832e 100755 --- a/dwarf/dwarf-compiler/bytecode.h +++ b/dwarf/dwarf-compiler/bytecode.h @@ -10,6 +10,7 @@ typedef enum { BC_ISUB, BC_IMUL, BC_IDIV, + BC_DUP, BC_IPUSHC, BC_IPUSHREG, BC_IPOPREG, @@ -27,6 +28,7 @@ static char* bytecode_mnemonics[] = { "ISUB", "IMUL", "IDIV", + "DUP", "IPUSHC", "IPUSHREG", "IPOPREG", diff --git a/dwarf/dwarf-compiler/codegen.c b/dwarf/dwarf-compiler/codegen.c index 23d98f5..01fc7bc 100755 --- a/dwarf/dwarf-compiler/codegen.c +++ b/dwarf/dwarf-compiler/codegen.c @@ -77,6 +77,66 @@ static size_t generate_div( ast_t* node, bytecode_builder_t* bc, generator_conte return first + bytecode_size[ BC_IDIV ] + second; } +static size_t generate_lt( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { + size_t first, second; + first = generate( node->attributes.as_binop.right, bc, ctx ); + second = generate( node->attributes.as_binop.left, bc, ctx ); + emit_bytecode( bc, BC_ICMP ); + return first + bytecode_size[ BC_ICMP ] + second; +} + +static size_t generate_gt( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { + size_t first, second; + first = generate( node->attributes.as_binop.left, bc, ctx ); + second = generate( node->attributes.as_binop.right, bc, ctx ); + emit_bytecode( bc, BC_ICMP ); + return first + bytecode_size[ BC_ICMP ] + second; +} + +static size_t generate_eq( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { + size_t first, second; + first = generate( node->attributes.as_binop.left, bc, ctx ); + second = generate( node->attributes.as_binop.right, bc, ctx ); + emit_bytecode( bc, BC_ICMP ); + emit_bytecode( bc, BC_DUP ); + emit_bytecode( bc, BC_IMUL ); + emit_ipushc( bc, -1 ); + emit_ipushc( bc, BC_IADD ); + return first + second + + bytecode_size[ BC_ICMP ] + + bytecode_size[ BC_DUP ] + + bytecode_size[ BC_IMUL ] + + bytecode_size[ BC_IPUSHC ] + + bytecode_size[ BC_IADD ] ; +} + +static size_t generate_le( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { + size_t first, second; + first = generate( node->attributes.as_binop.right, bc, ctx ); + second = generate( node->attributes.as_binop.left, bc, ctx ); + emit_bytecode( bc, BC_ICMP ); + emit_ipushc( bc, 1 ); + emit_ipushc( bc, BC_IADD ); + return first + second + + bytecode_size[ BC_ICMP ] + + bytecode_size[ BC_IPUSHC ] + + bytecode_size[ BC_IADD ] ; +} + + +static size_t generate_ge( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { + size_t first, second; + first = generate( node->attributes.as_binop.left, bc, ctx ); + second = generate( node->attributes.as_binop.right, bc, ctx ); + emit_bytecode( bc, BC_ICMP ); + emit_ipushc( bc, 1 ); + emit_ipushc( bc, BC_IADD ); + return first + second + + bytecode_size[ BC_ICMP ] + + bytecode_size[ BC_IPUSHC ] + + bytecode_size[ BC_IADD ] ; +} + static size_t generate_seq( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { size_t first, second; first = generate( node->attributes.as_seq.first, bc, ctx ); @@ -114,14 +174,14 @@ static size_t generate_if( ast_t* node, bytecode_builder_t* bc, generator_contex end = emit_jmp( bc ); no_size = generate( node-> attributes.as_if.no, bc, ctx ); - *no = yes_size + bytecode_size[ BC_JMP ]; - *end = no_size; + *no = yes_size; + *end = no_size + bytecode_size[ BC_JMP ]; return cond_size + bytecode_size[ BC_JZ ] - + yes_size + + yes_size + bytecode_size[ BC_JMP ] - + no_size; + + no_size; } static size_t generate_while( ast_t* node, bytecode_builder_t* bc, generator_context_t* ctx ) { @@ -139,7 +199,7 @@ static size_t generate_while( ast_t* node, bytecode_builder_t* bc, generator_con return cond_size + bytecode_size[ BC_JZ ] - + body_size + + body_size + bytecode_size[ BC_JMP ]; } @@ -154,6 +214,11 @@ static size_t generate( ast_t* node, bytecode_builder_t* bc, generator_context_t case AST_MINUS: return generate_minus( node, bc, ctx ); case AST_MULT: return generate_mult( node, bc, ctx ); case AST_DIVIDE: return generate_div( node, bc, ctx ); + case AST_LT: return generate_lt( node, bc, ctx ); + case AST_LE: return generate_le( node, bc, ctx ); + case AST_EQ: return generate_eq( node, bc, ctx ); + case AST_GE: return generate_ge( node, bc, ctx ); + case AST_GT: return generate_gt( node, bc, ctx ); case AST_SEQ: return generate_seq( node, bc, ctx ); case AST_IDENTIFIER: return generate_identifier( node, bc, ctx ); case AST_PRINT: return generate_print( node, bc, ctx ); @@ -171,6 +236,7 @@ bytecode_builder_t generate_from_ast( ast_t* root ) { ctx = generator_context_create(); generate( root, &bc, ctx ); + emit_bytecode( &bc, BC_HALT ); generator_context_destroy( ctx ); return bc; } diff --git a/dwarf/dwarf-compiler/io.c b/dwarf/dwarf-compiler/io.c new file mode 100755 index 0000000..5c20951 --- /dev/null +++ b/dwarf/dwarf-compiler/io.c @@ -0,0 +1,27 @@ +#include +#include "io.h" + +#define MAX_FILE_SIZE 65535 +char* read_file( FILE* file ) { + char* str ; + size_t read_size; + str = (char*) malloc( MAX_FILE_SIZE + 1 ); + read_size = fread( str, 1, MAX_FILE_SIZE, file ); + str[ read_size ] = 0; + return str; + + /*size_t buf_size = 64; + size_t read_size = 0; + char *str; + str = (char*) malloc( buf_size ); + read_size += fread( str, 1, buf_size, file ); + while( !feof( file )) { + buf_size *=2; + str = (char*) realloc(str, buf_size ); + read_size += fread( str + buf_size /2 - 1 , 1, buf_size /2 + 1, file ); + } + str[ read_size ] = 0; + return str;*/ +} + +void write_bytecode( bytecode_t* bytecode ); \ No newline at end of file diff --git a/dwarf/dwarf-compiler/io.h b/dwarf/dwarf-compiler/io.h new file mode 100755 index 0000000..1660cc9 --- /dev/null +++ b/dwarf/dwarf-compiler/io.h @@ -0,0 +1,11 @@ +#ifndef FILE_IO +#define FILE_IO + +#include +#include "bytecode.h" + +char* read_file( FILE* file ); +void write_bytecode( bytecode_t* bytecode ); + + +#endif \ No newline at end of file diff --git a/dwarf/dwarf-compiler/main.c b/dwarf/dwarf-compiler/main.c index ac89b84..22d9efd 100755 --- a/dwarf/dwarf-compiler/main.c +++ b/dwarf/dwarf-compiler/main.c @@ -4,22 +4,40 @@ #include "AST.h" #include "parser.h" #include "codegen.h" - +#include "io.h" +static void token_item_print( token_list_t* item ) { + token_print( & ( item->token ) ); +} int main( int argc, char** argv ) { token_list_t* list; bytecode_builder_t bc; ast_t* tree; - char* program = "x := 4; if (x) then y:=2 5 else z := 5" ; + char* program; + FILE* input; + + input = fopen( "program.txt", "r" ); + if (!input) return EXIT_FAILURE; + + program = read_file( input ); + fclose( input ); + list = tokenize( program ); + puts("Tokens:"); + token_list_foreach( list, token_item_print ); + puts(""); tree = parse( list, program ); bc = generate_from_ast( tree ); puts(""); bytecode_prettyprint( bc.first ); + + free( program ); token_list_free( list ); - ast_free( tree ); + ast_free( tree ); + //free bytecode? + } diff --git a/dwarf/dwarf-compiler/parser.c b/dwarf/dwarf-compiler/parser.c index afb9f0b..c7c29f9 100755 --- a/dwarf/dwarf-compiler/parser.c +++ b/dwarf/dwarf-compiler/parser.c @@ -14,8 +14,8 @@ statement := "{" statements "}" | assignment | if | while | print assignment := IDENT "=" expr if := "if" "(" expr ")" "then" statement "else" statement while := "while" "(" expr ")" statement - -expr := expr1 "+" expr | expr1 "-" expr | expr1 +expr := expr0 "<" expr | expr0 "<=" expr | expr0 "==" expr | expr0 ">" expr | expr0 ">=" expr | expr0 +expr0 = expr1 "+" expr | expr1 "-" expr | expr1 expr1 := atom "*" expr1 | atom "/" expr1 | atom atom := "(" expr ")" | NUMBER @@ -39,6 +39,7 @@ static ast_t* assignment( token_list_t** stream, const char* const code ); static ast_t* if_then_else( token_list_t** stream, const char* const code ); static ast_t* while_cond( token_list_t** stream, const char* const code ); static ast_t* expression( token_list_t** stream, const char* const code ); +static ast_t* expression0( token_list_t** stream, const char* const code ); static ast_t* expression1( token_list_t** stream, const char* const code ); static ast_t* atom( token_list_t** stream, const char* const code ); @@ -66,9 +67,30 @@ static token_t* accept( token_list_t** stream, token_type_t token_type ) else return NULL; } - -/* Recursive-descent parser */ static ast_t* expression( token_list_t** stream, const char* const code ) { + ast_t* lhs, *rhs; + lhs = expression0( stream, code ); + if ( accept( stream, TOK_LT) ) { + rhs = expression0( stream, code ); error_if_null( rhs, stream, code ); + return ast_lt( lhs, rhs ); + } else if ( accept( stream, TOK_LE) ) { + rhs = expression0( stream, code ); error_if_null( rhs, stream, code ); + return ast_le( lhs, rhs ); + } else if ( accept( stream, TOK_EQ) ) { + rhs = expression0( stream, code ); error_if_null( rhs, stream, code ); + return ast_eq( lhs, rhs ); + } else if ( accept( stream, TOK_GE) ) { + rhs = expression0( stream, code ); error_if_null( rhs, stream, code ); + return ast_ge( lhs, rhs ); + } else if ( accept( stream, TOK_GT) ) { + rhs = expression0( stream, code ); error_if_null( rhs, stream, code ); + return ast_gt( lhs, rhs ); + } + error_if_null( lhs, stream, code ); + return lhs; +} +/* Recursive-descent parser */ +static ast_t* expression0( token_list_t** stream, const char* const code ) { ast_t* lhs, *rhs; lhs = expression1( stream, code ); error_if_null( lhs, stream, code ); diff --git a/dwarf/dwarf-compiler/tokens.h b/dwarf/dwarf-compiler/tokens.h index 3eadff1..a761349 100755 --- a/dwarf/dwarf-compiler/tokens.h +++ b/dwarf/dwarf-compiler/tokens.h @@ -23,6 +23,12 @@ typedef enum { TOK_MULT, TOK_DIVIDE, + TOK_LE, + TOK_LT, + TOK_EQ, + TOK_GE, + TOK_GT, + TOK_NUM, TOK_IDENT, TOK_EOF, @@ -49,6 +55,12 @@ static char* token_strings[] = { "*", "/", + "<=", + "<", + "==", + ">=", + ">" + "", "", ""