-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.c
1016 lines (891 loc) · 26.1 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <ctype.h>
#include <stdio.h>
#define DS_IMPLEMENTATION
#include "ds.h"
#include <string.h>
// ##################################################################
// # #
// # jLang & Compiler #
// # #
// ##################################################################
// Token types for the compiler
enum token_type
{
IDENT,
LABEL,
INT,
INPUT,
OUTPUT,
GOTO,
IF,
THEN,
EQUAL,
PLUS,
LESS_THAN,
INVALID,
END
};
// Represents a token in the input stream
struct token
{
enum token_type type;
char *value;
};
// Function that returns token type
const char *show_token_type(enum token_type type)
{
switch (type)
{
case IDENT:
return "ident";
case LABEL:
return "label";
case INT:
return "int";
case INPUT:
return "input";
case OUTPUT:
return "output";
case GOTO:
return "goto";
case IF:
return "if";
case THEN:
return "then";
case EQUAL:
return "equal";
case PLUS:
return "plus";
case LESS_THAN:
return "less_than";
case INVALID:
return "invalid";
case END:
return "end";
}
}
// Function that prints the type and value of a token
void print_token(struct token tok)
{
const char *type = show_token_type(tok.type);
printf("%s", type);
if (tok.value != NULL)
{
printf("(%s)", tok.value);
}
printf("\n");
}
// Lexer breaks input character stream into tokens
struct lexer
{
char *buffer;
unsigned int buffer_len;
unsigned int pos;
unsigned int read_pos;
char ch;
};
// Function that looks at the char in the read_pos in the input buffer without returning it
static char lexer_peek_char(struct lexer *l)
{
if (l->read_pos >= l->buffer_len)
{
// if the read position is greater than length buffer, we are at EOF (end of file)
return EOF;
}
// return char at read_pos index
return l->buffer[l->read_pos];
}
// Function that returns the char at the read_pos, and advances the read_pos to the next char
static char lexer_read_char(struct lexer *l)
{
// calls on lexer_peek_char to get return value and store in ch position
l->ch = lexer_peek_char(l);
// advance new pos to be read_pos
l->pos = l->read_pos;
l->read_pos += 1;
return l->ch;
}
// Function that positions lexer at beginning of next token by skipping over any leading whitespace
static void skip_whitespaces(struct lexer *l)
{
// if whitespace, call lexer_read_char func, else continue
while (isspace(l->ch))
{
lexer_read_char(l);
}
}
// Function that initializes a new lexer instance
static void lexer_init(struct lexer *l, char *buffer, unsigned int buffer_len)
{
l->buffer = buffer;
l->buffer_len = buffer_len;
l->pos = 0;
l->read_pos = 0;
l->ch = 0;
// read first character
lexer_read_char(l);
}
// Function that handles core lexer logic
static struct token lexer_next_token(struct lexer *l)
{
// advance to first non-whitespace character
skip_whitespaces(l);
// Return END token
if (l->ch == EOF)
{
lexer_read_char(l);
return (struct token){.type = END, .value = NULL};
}
// Return EQUAL token
else if (l->ch == '=')
{
lexer_read_char(l);
return (struct token){.type = EQUAL, .value = NULL};
}
// Return LESS_THAN token
else if (l->ch == '<')
{
lexer_read_char(l);
return (struct token){.type = LESS_THAN, .value = NULL};
}
// Return PLUS token
else if (l->ch == '+')
{
lexer_read_char(l);
return (struct token){.type = PLUS, .value = NULL};
}
// : indicates we are dealing with a series of alphanumeric characters (a label)
else if (l->ch == ':')
{
lexer_read_char(l);
// create slice struct that initializes str to point to current_pos, with 0 length
ds_string_slice slice = {.str = l->buffer + l->pos, .len = 0};
// Increment length while we have alphanumeric char or underscore
while (isalnum(l->ch) || l->ch == '_')
{
slice.len += 1;
lexer_read_char(l);
}
char *value = NULL;
// create new owned string using slice and store in value pointer
ds_string_slice_to_owned(&slice, &value);
// create token with label and previosuely determined value
return (struct token){.type = LABEL, .value = value};
}
else if (isdigit(l->ch))
{
// similar logic to :, just return INT token
ds_string_slice slice = {.str = l->buffer + l->pos, .len = 0};
while (isdigit(l->ch))
{
slice.len += 1;
lexer_read_char(l);
}
char *value = NULL;
ds_string_slice_to_owned(&slice, &value);
return (struct token){.type = INT, .value = value};
}
else if (isalnum(l->ch) || l->ch == '_')
{
// similar logic to conditional above
ds_string_slice slice = {.str = l->buffer + l->pos, .len = 0};
while (isalnum(l->ch) || l->ch == '_')
{
slice.len += 1;
lexer_read_char(l);
}
char *value = NULL;
ds_string_slice_to_owned(&slice, &value);
// check if the identfied value is a keyword
// Return input keyword token
if (strcmp(value, "input") == 0)
{
return (struct token){.type = INPUT, .value = NULL};
}
// Return output keyword token
else if (strcmp(value, "output") == 0)
{
return (struct token){.type = OUTPUT, .value = NULL};
}
// Return if keyword token
else if (strcmp(value, "if") == 0)
{
return (struct token){.type = IF, .value = NULL};
}
// Return then keyword token
else if (strcmp(value, "then") == 0)
{
return (struct token){.type = THEN, .value = NULL};
}
// Return goto keyword token
else if (strcmp(value, "goto") == 0)
{
return (struct token){.type = GOTO, .value = NULL};
}
// Return identifier token
else
{
return (struct token){.type = IDENT, .value = value};
}
}
else
{
// return an invalid token for characters that do not fall into conditionals above
ds_string_slice slice = {.str = l->buffer + l->pos, .len = 1};
char *value = NULL;
ds_string_slice_to_owned(&slice, &value);
lexer_read_char(l);
return (struct token){.type = INVALID, .value = value};
}
}
// Function that tokenizes input buffer and stores tokens in an dynamic array data structure
int lexer_tokenize(char *buffer, unsigned int length,
ds_dynamic_array *tokens)
{
struct lexer lexer;
// Initialize the lexer with the input buffer and length parameters
lexer_init(&lexer, (char *)buffer, length);
struct token tok;
do
{
// get next token
tok = lexer_next_token(&lexer);
if (ds_dynamic_array_append(tokens, &tok) != 0)
{
// throw error if we cannot append to end of dynamic array
DS_PANIC("Failed to append token to array");
}
} while (tok.type != END);
return 0;
}
// Defining structures for Abstract Syntax Tree (AST)
// Types of possible terms (input, integers, and identifiers)
enum term_type
{
TERM_INPUT,
TERM_INT,
TERM_IDENT
};
// Represents a term in our JLang language
struct term_node
{
enum term_type type;
union
{
char *value;
};
};
// Types of possible expressions (unary term, or binary expression)
enum expr_type
{
EXPR_TERM,
EXPR_PLUS,
};
// Represents a binary expression with two terms (LHS and RHS)
struct term_binary_node
{
struct term_node lhs;
struct term_node rhs;
};
// General struct for expressions, can be either binary or single term
struct expr_node
{
enum expr_type type;
union
{
struct term_node term;
struct term_binary_node add;
};
};
// The types of relation operations (less than, intending to add more in future)
enum rel_type
{
REL_LESS_THAN,
};
// Outlines a node with our relational expression (less than)
struct rel_node
{
enum rel_type type;
union
{
struct term_binary_node less_than;
};
};
// Specifies the possible instruction types (assignment, if conditionals, goto, outputs, and labels)
enum instr_type
{
INSTR_ASSIGN,
INSTR_IF,
INSTR_GOTO,
INSTR_OUTPUT,
INSTR_LABEL
};
// Node for if we are dealing with an assignment instruction, housing identifier and expression
struct assign_node
{
char *ident;
struct expr_node expr;
};
// Node for if we are dealing with an if conditional, housing instruction node for if it is true
struct if_node
{
struct rel_node rel;
struct instr_node *instr;
};
// Node for if we are dealing with goto statement, housing label where directed instruction block begins
struct goto_node
{
char *label;
};
// Node for if we are dealing with an output, housing term to be displayed
struct output_node
{
struct term_node term;
};
// Node for if we are dealing with a label, housing value
struct label_node
{
char *label;
};
// General node that holds type, and union to hold different instruction node types
struct instr_node
{
enum instr_type type;
union
{
struct assign_node assign;
struct if_node if_;
struct goto_node goto_;
struct output_node output;
struct label_node label;
};
};
// Node to represent an array of instructions (given that we can have multi-line programs)
struct program_node
{
ds_dynamic_array instrs;
};
// Struct to represent parser with array of tokens and corresponding array index
struct parser
{
ds_dynamic_array tokens;
unsigned int index;
};
// Function to intiialize parser instance
void parser_init(ds_dynamic_array tokens, struct parser *p)
{
p->tokens = tokens;
p->index = 0;
}
// Function to return current token from tokens array at index
void parser_current(struct parser *p, struct token *token)
{
ds_dynamic_array_get(&p->tokens, p->index, token);
}
// advance the current index in the tokens array
void parser_advance(struct parser *p) { p->index++; }
// FUnction to parse a token from the given token stream
void parse_term(struct parser *p, struct term_node *term)
{
struct token token;
parser_current(p, &token);
// determine type of token, and assign value based on corresponding node struct
if (token.type == INPUT)
{
term->type = TERM_INPUT;
}
else if (token.type == INT)
{
term->type = TERM_INT;
term->value = token.value;
}
else if (token.type == IDENT)
{
term->type = TERM_IDENT;
term->value = token.value;
}
else
{
// throw error if the type is not expected
DS_PANIC("Expected a term (input, int or ident) but found %s",
show_token_type(token.type));
}
// advance to next token in stream
parser_advance(p);
}
// Function to transform single terms and binary expressions into expression nodes
void parse_expr(struct parser *p, struct expr_node *expr)
{
struct token token;
struct term_node lhs, rhs;
parse_term(p, &lhs);
// get current token from term
parser_current(p, &token);
// plus is the only binary expression operator
if (token.type == PLUS)
{
parser_advance(p);
parse_term(p, &rhs);
// create binary expression node
expr->type = EXPR_PLUS;
expr->add.lhs = lhs;
expr->add.rhs = rhs;
}
else
{
// create single expression node
expr->type = EXPR_TERM;
expr->term = lhs;
}
}
// Function to turn relation operation expressions into corresponding nodes
void parse_rel(struct parser *p, struct rel_node *rel)
{
struct token token;
struct term_node lhs, rhs;
parse_term(p, &lhs);
// get current token from term
parser_current(p, &token);
// Conditional to check if < (assuming more relation operators are added in the future)
if (token.type == LESS_THAN)
{
parser_advance(p);
parse_term(p, &rhs);
// create relation operation node
rel->type = REL_LESS_THAN;
rel->less_than.lhs = lhs;
rel->less_than.rhs = rhs;
}
else
{
// Throw error if we get a relation operator that is not <
DS_PANIC("Expected rel (<) found %s", show_token_type(token.type));
}
}
// Function to turn assignment operation expressions into corresponding nodes
void parse_assign(struct parser *p, struct instr_node *instr)
{
struct token token;
// set instruction type to assignment
instr->type = INSTR_ASSIGN;
parser_current(p, &token);
instr->assign.ident = token.value;
// consume the identifier token
parser_advance(p);
parser_current(p, &token);
if (token.type != EQUAL)
{
// throw error if we are given an assignment operator that is not =
DS_PANIC("Expected equal found %s", show_token_type(token.type));
}
// consume the operator (=) token
parser_advance(p);
// parse the expression to get actual assignment
parse_expr(p, &instr->assign.expr);
}
// Forward declaration for parse_instr, since we will use it in parse_if
void parse_instr(struct parser *p, struct instr_node *instr);
// Function to turn if operation expressions into corresponding nodes
void parse_if(struct parser *p, struct instr_node *instr)
{
struct token token;
// assign type to if
instr->type = INSTR_IF;
parser_advance(p);
// parse the relation to get the conditional
parse_rel(p, &instr->if_.rel);
parser_current(p, &token);
if (token.type != THEN)
{
// if we get unexpected syntax for statement after conditional
DS_PANIC("Expected then found %s", show_token_type(token.type));
}
parser_advance(p);
// Allocate memory and parse instruction for if
instr->if_.instr = malloc(sizeof(struct instr_node));
parse_instr(p, instr->if_.instr);
}
// Function to parse goto expressions
void parse_goto(struct parser *p, struct instr_node *instr)
{
struct token token;
// Assign instruction type to be goto
instr->type = INSTR_GOTO;
parser_advance(p);
parser_current(p, &token);
if (token.type != LABEL)
{
// if label cannot be processed
DS_PANIC("Expected label found %s", show_token_type(token.type));
}
parser_advance(p);
// set goto label to be value of goto token
instr->goto_.label = token.value;
}
// Function that parses an output instruction
void parse_output(struct parser *p, struct instr_node *instr)
{
struct token token;
struct term_node lhs;
// Assign instruction type to be output
instr->type = INSTR_OUTPUT;
parser_advance(p);
// parse the term to be outputted
parse_term(p, &lhs);
// set the term in the output instruction
instr->output.term = lhs;
}
// Function to parse labels in code and assign corresponding node values
void parse_label(struct parser *p, struct instr_node *instr)
{
struct token token;
// Assign instruction type to be label
instr->type = INSTR_LABEL;
parser_current(p, &token);
// Set the value of the label name
instr->label.label = token.value;
// consume the label token
parser_advance(p);
}
// Function to parse instructions, given instruction type
void parse_instr(struct parser *p, struct instr_node *instr)
{
struct token token;
// Get current token
parser_current(p, &token);
// Call different parser functions based on token type
if (token.type == IDENT)
{
parse_assign(p, instr);
}
else if (token.type == IF)
{
parse_if(p, instr);
}
else if (token.type == GOTO)
{
parse_goto(p, instr);
}
else if (token.type == OUTPUT)
{
parse_output(p, instr);
}
else if (token.type == LABEL)
{
parse_label(p, instr);
}
else
{
// throw if we have a bad token type
DS_PANIC("unexpected token %s", show_token_type(token.type));
}
}
// Function to return value in dynamic array of tokens
int find_variable(ds_dynamic_array *variables, char *ident)
{
for (unsigned int i = 0; i < variables->count; i++)
{
char *variable = NULL;
ds_dynamic_array_get(variables, i, &variable);
// use strcmp to compare two strings
if (strcmp(ident, variable) == 0)
{
return i;
}
}
return -1;
}
// Function that reads program and builds AST
void parse_program(struct parser *p, struct program_node *program)
{
ds_dynamic_array_init(&program->instrs, sizeof(struct instr_node));
struct token token;
do
{
struct instr_node instr;
parse_instr(p, &instr);
// add parsed unsrtuction to the end of instruction array
ds_dynamic_array_append(&program->instrs, &instr);
// get next token
parser_current(p, &token);
} while (token.type != END);
}
// Function that generates assembly for a given term
void term_asm(struct term_node *term, ds_dynamic_array *variables)
{
switch (term->type)
{
// If we are dealing with input expression
case TERM_INPUT:
{
// read a number and store it in rax x86 register
printf(" read 0, line, LINE_MAX\n");
printf(" mov rdi, line\n");
printf(" call strlen\n");
printf(" mov rdi, line\n");
printf(" mov rsi, rax\n");
printf(" call parse_uint\n");
break;
}
// If we are dealing with identifier term
case TERM_IDENT:
{
int index = find_variable(variables, term->value);
printf(" mov rax, qword [rbp - %d]\n", index * 8 + 8);
break;
}
// If we are dealing with integer term
case TERM_INT:
printf(" mov rax, %s\n", term->value);
break;
}
}
// Function that generates assembly for given expression
void expr_asm(struct expr_node *expr, ds_dynamic_array *variables)
{
switch (expr->type)
{
// If we are dealing with unary expression
case EXPR_TERM:
{
// generate assembly for single term using previousely declared function
term_asm(&expr->term, variables);
break;
}
// If we are dealing with a binary expression
case EXPR_PLUS:
term_asm(&expr->add.lhs, variables);
printf(" mov rdx, rax\n");
term_asm(&expr->add.rhs, variables);
// add result of generated assembly of rhs stored in rax to left hand result stores in rdx
printf(" add rax, rdx\n");
break;
}
}
// Function to generate assembly for relation operators (<)
void rel_asm(struct rel_node *rel, ds_dynamic_array *variables)
{
switch (rel->type)
{
case REL_LESS_THAN:
term_asm(&rel->less_than.lhs, variables);
printf(" mov rdx, rax\n");
term_asm(&rel->less_than.rhs, variables);
// Compare the left-hand side (rdx) with the right-hand side (rax)
printf(" cmp rdx, rax\n");
// Set the least significant bit of al to 1 if rdx is less than rax
printf(" setl al\n");
// Clear remaning bits and zero ex-tend al to rax (to ensure consistent data size)
printf(" and al, 1\n");
printf(" movzx rax, al\n");
break;
}
}
// Function to generate assembly code for a given instruction node
void instr_asm(struct instr_node *instr, ds_dynamic_array *variables,
int *if_count)
{
switch (instr->type)
{
// if dealing with assignment node
case INSTR_ASSIGN:
{
expr_asm(&instr->assign.expr, variables); // the result is in rax
int index = find_variable(variables, instr->assign.ident);
printf(" mov qword [rbp - %d], rax\n", index * 8 + 8);
break;
}
// if dealing with if statement
case INSTR_IF:
{
rel_asm(&instr->if_.rel, variables); // the result is in rax
int label = (*if_count)++;
printf(" test rax, rax\n");
printf(" jz .endif%d\n", label);
instr_asm(instr->if_.instr, variables, if_count);
printf(".endif%d:\n", label);
break;
}
// if dealing with goto statement
case INSTR_GOTO:
{
printf(" jmp .%s\n", instr->goto_.label);
break;
}
// if dealing with output
case INSTR_OUTPUT:
{
term_asm(&instr->output.term, variables);
printf(" mov rdi, 1\n");
printf(" mov rsi, rax\n");
printf(" call write_uint\n");
break;
}
// if dealing with a label
case INSTR_LABEL:
printf(".%s:\n", instr->label.label);
break;
}
}
// Function that checks if we have a variable declaration
void term_declare_variables(struct term_node *term,
ds_dynamic_array *variables)
{
switch (term->type)
{
// For first two cases, we have no variables to declare
case TERM_INPUT:
break;
case TERM_INT:
break;
case TERM_IDENT:
for (unsigned int i = 0; i < variables->count; i++)
{
char *variable = NULL;
ds_dynamic_array_get(variables, i, &variable);
// Here we check of the variable is already declared
if (strcmp(term->value, variable) == 0)
{
return;
}
}
// Throw an error if the variable is not defined
DS_PANIC("Identifier is not defined %s", term->value);
break;
}
}
// Function that declares variables use in a expression (unary or binary)
void expr_declare_variables(struct expr_node *expr,
ds_dynamic_array *variables)
{
switch (expr->type)
{
case EXPR_TERM:
{
// Recursively declare single term
term_declare_variables(&expr->term, variables);
break;
}
case EXPR_PLUS:
// Recursively declare variables for left and right side of binary expression
term_declare_variables(&expr->add.lhs, variables);
term_declare_variables(&expr->add.rhs, variables);
break;
}
}
// Function that that declares variables used in a relational expression
void rel_declare_variables(struct rel_node *rel, ds_dynamic_array *variables)
{
switch (rel->type)
{
case REL_LESS_THAN:
// Recursively declare variables for left and right side
term_declare_variables(&rel->less_than.lhs, variables);
term_declare_variables(&rel->less_than.rhs, variables);
break;
}
}
// Function responsible for declaring variables used within an instruction
void instr_declare_variables(struct instr_node *instr,
ds_dynamic_array *variables)
{
switch (instr->type)
{
// if dealing with an instruction assignment
case INSTR_ASSIGN:
{
expr_declare_variables(&instr->assign.expr, variables);
for (unsigned int i = 0; i < variables->count; i++)
{
char *variable = NULL;
ds_dynamic_array_get(variables, i, &variable);
// Here we check of the variable is already declared
if (strcmp(instr->assign.ident, variable) == 0)
{
return;
}
}
// append variables to the end of the dynamic token array
ds_dynamic_array_append(variables, &instr->assign.ident);
break;
}
// if dealing with an if statement
case INSTR_IF:
{
rel_declare_variables(&instr->if_.rel, variables);
instr_declare_variables(instr->if_.instr, variables);
break;
}
// We can just break here, since no variables are declared in goto
case INSTR_GOTO:
{
break;
}
// For when we are dealing with output
case INSTR_OUTPUT:
{
term_declare_variables(&instr->output.term, variables);
break;
}
// We can just break here, since no variables are declared in a label node
case INSTR_LABEL:
break;
}
}
// Function responsible for generating the final assembly code through AST
void program_asm(struct program_node *program)
{
int if_count = 0;
ds_dynamic_array variables;
ds_dynamic_array_init(&variables, sizeof(char *));
// Iterate through and declare variables
for (unsigned int i = 0; i < program->instrs.count; i++)
{
struct instr_node instr;
ds_dynamic_array_get(&program->instrs, i, &instr);
instr_declare_variables(&instr, &variables);
}
// Header for Assembly Executable Format (ELF64)
printf("format ELF64 executable\n");
printf("LINE_MAX equ 1024\n");
printf("segment readable executable\n");
printf("include \"linux.inc\"\n");
printf("include \"utils.inc\"\n");
printf("entry _start\n");
printf("_start:\n");
// Set up stack and allocate space for all declared variables (assuming 64 bit variables)
printf(" mov rbp, rsp\n");
printf(" sub rsp, %d\n", variables.count * 8);
for (unsigned int i = 0; i < program->instrs.count; i++)
{
// Iterate through and generate assembly code for each instruction
struct instr_node instr;
ds_dynamic_array_get(&program->instrs, i, &instr);
instr_asm(&instr, &variables, &if_count);
}
// Stack Cleanup and Program Termination
printf(" add rsp, %d\n", variables.count * 8);
printf(" mov rax, 60\n");
printf(" xor rdi, rdi\n");
printf(" syscall\n");
printf("segment readable writeable\n");
printf("line rb LINE_MAX\n");
}
int main()
{
char *buffer = NULL;
// read input file
int length = ds_io_read_file(NULL, &buffer);
// initialize token array