-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparsercodegen.c
2130 lines (1870 loc) · 74.6 KB
/
parsercodegen.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Team Leader: Tyler Crawford
// Member: Nelson Herrera Gamboa
// Class: COP3402
// Date of Last Edit: 2/15/2023
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define CMAX 11 // maximum number of chars for idents
#define STRMAX 125 // Assuming max codeline length is 50
#define MAX_NAME_TABLE_SIZE 500
//==================================================================================================================================================================//
//==================================================================================================================================================================//
//==================================================================================================================================================================//
char* lexicalParse(char* codeLine);
int numberOfFileLines(char* filename);
int characterInSymbolTableBS(char c, char* symTbl);
int isStatementReserved(char* word);
char* subString(int start, int end,char* line);
int isWordValid(char* word);
void PROGRAM();
void BLOCK();
char* GET_Token();
int Get_TokenInteger();
void CONST_DECLARATION();
void VAR_DECLARATION();
int SYMBOLTABLECHECK(char* name);
void EXPRESSION();
void STATEMENT();
void CONDITION();
void TERM();
void FACTOR();
void printSymTbl();
void printAssCodes();
char* assemblyConvertOP(int OP,int M);
void freeAll();
//==========================================================================================================================
//==========================================================================================================================
char* global_Lexeme;
int universialIndex = 0;
int tokencount = 0;
int universalCodeText = 1;// This keeps track of how many opcodes we have (This should be "Line" in the terminal)
int universalCodeAddress = 3;// This keeps track of the addresses in the assembly text, we start at 3 for the 0 0 0 then we go and read the next opcode
int variableCount = 0;// This keeps track of how many Variables have been stored into the Symbol table
int universalSymbolIndex = 2;// This keeps track of what index we must store into next. This also is where we start our search. Searching starts at universal Symbol Index and decrements until 0.
typedef struct{
int kind; //const = 1, var = 2, proc = 3. -- proc is not in use for now.
char name[12]; // name up to 11 chars not including '\0'.(identifer)
int val; // number (ASCII value)
int level; // L level -- assume 0 for the most part
int adr; // M address
int mark; // 0 = in use for code generation, 1 = unavailable.
} namerecord_t;
// This struct will be used to store the opcodes as be "emit" them to the screen for processing later.
typedef struct{
int OP; // name up to 11 chars.(identifer)
int L; // number (ASCII value)
int M; // L level
} assembly_Node;
// Function Signitures for functions that will insert structs into the arrays below.
namerecord_t* initializeNameRecord(int _kind, char* _name, int _val, int _level, int _adr, int _mark);
assembly_Node* initializeAssemblyRecord(int OP, int L, int M);
// Arrays that will hold the structs.
namerecord_t* symbol_Table[MAX_NAME_TABLE_SIZE];
assembly_Node* assembly_Code[MAX_NAME_TABLE_SIZE];// this is where we will be storing the the assembly code
int TOKEN = -1;
//==========================================================================================================================
//==========================================================================================================================
typedef enum {
identsym = 2,
numbersym = 3,
plussym = 4 ,
minussym = 5,
multsym = 6,
slashsym = 7,
oddsym = 8,
eqsym = 9,
neqsym = 10,
lessym = 11,
leqsym = 12,
gtrsym = 13,
geqsym = 14,
lparentsym = 15,
rparentsym = 16,
commasym = 17,
semicolonsym = 18,
periodsym = 19,
becomessym = 20,
beginsym = 21,
endsym = 22,
ifsym = 23,
thensym = 24,
whilesym = 25,
dosym = 26,
constsym = 28,
varsym = 29,
writesym = 31,
readsym = 32,
}token_type;
//==================================================================================================================================================================//
//==================================================================================================================================================================//
//==================================================================================================================================================================//
char* resWords[] = {"odd", "begin", "end", "if", "then", "while", "do", "const", "var", "write", "read"};
int resWordsTokens[] = {oddsym, beginsym, endsym, ifsym, thensym, whilesym, dosym, constsym, varsym, writesym , readsym};
//Symbol table is in order of ascii value, can be used with binary search
char symbolTableOrdered[] = {'\t','\r',' ','(',')','*', '+',',', '-' ,'.', '/', '0', '1', '2','3', '4', '5', '6', '7', '8', '9', ':', ';', '<',
'=', '>','A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z','a','b','c','d','e','f','g','h','i','j','k','l' ,'m' ,'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
'v', 'w', 'x', 'y', 'z'};
//Symbols which are essentially breakpoints and line enders // is not in here, a lookahead check is necessary for that one
char specialTerminalSymbolsOrdered[] = {'\t','\r',' ', '(',')','*', '+',',', '-' ,'.', '/', ':', ';','<','=','>','~'}; // ' ' isn't a term sym, it was put here
int specialTerminalSymbolsTokens[] = {-3,-2,-1, lparentsym, rparentsym, multsym, plussym, commasym, minussym ,periodsym, slashsym, 0, semicolonsym ,lessym, eqsym, gtrsym,0}; // -1 is for spaces and 0 is for colons and -2 for tabs,
int halt_flag = 1;
int EndProgramFlag = 1;
//==================================================================================================================================================================//
//==================================================================================================================================================================//
//==================================================================================================================================================================//
// char letters[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
// 'X', 'Y', 'Z','a','b','c','d','e','f','g','h','i','j','k','l' ,'m' ,'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
// 'v', 'w', 'x', 'y', 'z'};
// char numbers[] = {'0', '1', '2','3', '4', '5', '6', '7', '8', '9'};
//==================================================================================================================================================================//
//==================================================================================================================================================================//
//==================================================================================================================================================================//
char* lexicalParse(char* codeLine)
{
//Copy space of parent string
char* parsedString = malloc(sizeof(codeLine));
parsedString[0] = '\0';
//Start is set to zero as no special characters have been detected yet
int start = 0;
for(int i = 0; i<strlen(codeLine);i++)
{
//Check if character is valid within the symbol table
//Check if the next character from current index is a special symbol
int lookAheadPlus = characterInSymbolTableBS(codeLine[i+1], symbolTableOrdered);
int lookAhead = characterInSymbolTableBS(codeLine[i+1], specialTerminalSymbolsOrdered);
//Check if the current character from current index is a special symbol
int specialIndex = characterInSymbolTableBS(codeLine[i], specialTerminalSymbolsOrdered);
int current = characterInSymbolTableBS(codeLine[i], symbolTableOrdered);
// reserved index set to -1, used only if a reserved word is detected in deeper code
// where it would take up the index of a reserved word in resWords that connects to
// resWordsTokens in a 1 to 1 relationship.
int reservedIndex = -1;
// If look ahead is a special character or i is on the last iteration
// try to make a word from indexes start to i+1 exclusively (i+1 is where the
// special char is)
if(current == -1)
{
char invalid_string[4];
invalid_string[0] = '2';
invalid_string[1] = ' ';
invalid_string[2] = codeLine[i];
invalid_string[3] = '\0';
strcat(parsedString, invalid_string);
strcat(parsedString, " ");
start = start +1;
//continue;
}
else if((lookAhead != -1 || lookAheadPlus == -1)|| i == strlen(codeLine)-1 || (lookAhead == -1 && (specialIndex != -1)))
{
// asdfas % *
// Substring attempts to create a word that is either a reserved word or
// identifier. The substring will return null if the start is a special character
char* word = subString(start, i+1 ,codeLine);
//token is set as default to the identifier token value
int token = identsym;
// If the word exists check if the word is both valid or a reserved word.
// If it is a resseved word, tokenize it, if it is not valid, stop the parse,
// free the WIP parse string and entirely and return null from the entire function
// to indicate an error occured. Prints out to console on what type of error occured
// on the given line, before exiting on error.
if(word != NULL)
{
//checks if word is valid, errors will return null
int valid = isWordValid(word);
//printf("%s\n",word);
reservedIndex = isStatementReserved(word);
if(reservedIndex != -1)
{
token = resWordsTokens[reservedIndex];
}
if(valid == 2 || valid == -3)
{
token = numbersym;
}
start = i + 1;
//if(lookAheadPlus == -1) start = start + 1;
}
else
{
// Take the index of the special character in the codeline (that failed to create a word) and
// find what special char token it corresponds to
specialIndex = characterInSymbolTableBS(codeLine[start], specialTerminalSymbolsOrdered);
token = specialTerminalSymbolsTokens[specialIndex];
//printf("TOKEN LOOKING FOR 13 %d\n",token);
// if(specialIndex == -1)
// {
// token = -1;
// }
// Some tokens can merge with other tokens to create special cases.
// This checks for those and merges the tokens accordingly and shifts the
// start and i by 1, as in this case we "truncate" the string by one char
// when completeing a merge.
// The switch also tests for comments to ignore when tokenizing, IT DOESN'T CHECK CONTENT
// of comments. Invalid characters could be inside the commments. Also, it doesn't assume
// there to be nested comments like: /*/**/*/
switch(token)
{
case 0:
if(codeLine[start+1] == '=')
{
token = becomessym; // :=
start = start + 1;
i++;
}
break;
case lessym:
if(codeLine[start+1] == '=')
{
token = leqsym; // <=
start = start + 1;
i++;
}
else if(codeLine[start+1] == '>')
{
token = neqsym; // <>
start = start + 1;
i++;
}
break;
case gtrsym:
if(codeLine[start+1] == '=')
{
token = geqsym; // >=
start = start + 1;
i++;
}
break;
case slashsym: // Slashsym is for /
if(codeLine[start+1] == '*') // checks for a /* case to indicate a comment start
{
token = -1; // From comment start onwards is to be ignored, so token is set to -1
int commentError = 1;
// Check to see if based on where the comment start is, if it would be
// possible to have a */ in the remaining space of the codeLine passed in
if((strlen(codeLine) - (i+1))>=2)
{
//Loop through and check to find if a corresponding */ exists
for(int x = start+2; x<strlen(codeLine)-1;x++)
{
char twoChars[] = {codeLine[x],codeLine[x+1]}; // probably needs to change to checking only last two chars,
if(twoChars[0] == '*' && twoChars[1]== '/') // instead of checking whole thing
{
commentError = 0;
// if the comment ends on the last character of the codeLine
// break out and end the parse.
if(x+1 == strlen(codeLine)-1)
{
halt_flag = 0;
}
else
{
start = x+1;
i = x+1;
}
break;
}
}
}
if(commentError) // no end of comment was found, return an error
{
token = -5;
start = start + 1;
i++;
//printf("Code line: '%s' has an unresolved comment, not ended with '*/' \n", codeLine);
//free(parsedString);
//return NULL;
}
}
break;
}
if(halt_flag == 0)
{
halt_flag = 1; // reset the halt flag
break;
}
start = start + 1; // increment start
}
// Negative tokens are to be ignored, only positive are to be added on the string
if(token > 0 || token == -5)
{
char int_string[4];
sprintf(int_string, "%d ",token);
strcat(parsedString, int_string);
if(token == identsym || token == numbersym || reservedIndex != -1)
{
if(reservedIndex == -1)
{
strcat(parsedString, word);
strcat(parsedString, " ");
}
}
}
// if(token == 2 && characterInSymbolTableBS(word[0], symbolTableOrdered) == -1)
// {
// start = start + 1;
// }
}
// else
// {
// if(current == -1)
// {
// char invalid_string[4];
// invalid_string[0] = '2';
// invalid_string[1] = ' ';
// invalid_string[2] = codeLine[i];
// invalid_string[3] = '\0';
// strcat(parsedString, invalid_string);
// strcat(parsedString, " ");
// //continue;
// }
// }
}
return parsedString;
}
int isStatementReserved(char* word)
{
int retval = -1;
for(int x = 0; x < 11; x++)
{
if(strcmp(word,resWords[x]) == 0)
{
retval = x;
break;
}
}
return retval;
}
int isWordValid(char* word)
{
int retval = 1;
//If it starts with an integer make sure it only has integers
if(word[0] >= '0' && word[0] <= '9')
{
for(int x = 1; x < strlen(word); x++)
{
if(word[x] < '0' || word[x] > '9')
{
retval = -2;
break;
}
}
//If the integer is longer than 5 digits
if(retval != -2)
{
if(strlen(word)>5)
{
retval = -3;
}
else
{
retval = 2;
}
}
}
//If the word is longer than CMAX characters
if(strlen(word) > CMAX && retval == 1)
{
retval = -1;
}
return retval;
}
char* subString(int start, int end,char* line)
{
if(characterInSymbolTableBS(line[start], specialTerminalSymbolsOrdered) != -1
&& characterInSymbolTableBS(line[start], symbolTableOrdered) != -1) return NULL;
char* word = malloc(sizeof(char)*(end-start+1));
word[0] = '\0';
for(int i = start; i<end;i++)
{
if(characterInSymbolTableBS(line[i], symbolTableOrdered) != -1)
{
word[strlen(word)] = line[i];
word[strlen(word) + 1] = '\0';
}
if(characterInSymbolTableBS(line[i], symbolTableOrdered) == -1)
{
break;
}
}
if(word[0] == '\0')
{
return NULL;
}
else
{
return word;
}
}
int numberOfFileLines(char* filename)
{
FILE *fp = fopen(filename, "r");
int numLines = 1;
char ch = ' ';
while ((ch = fgetc(fp)) != EOF)
{
if (ch == '\n')
{
numLines++;
}
}
fclose(fp);
return numLines;
}
int characterInSymbolTableBS(char c, char* symTbl)
{
int low = 0;
int high = strlen(symTbl)-1;
while (low <= high)
{
int mid = (high + low) / 2;
if (symTbl[mid] == c) return mid;
else if (symTbl[mid] > c) high = mid - 1;
else low = mid + 1;
}
return -1;
}
int binarySearch(int arr[], int left, int right, int x) {
while (left <= right) {
int mid = left + (right - left) / 2;
// Check if x is present at mid
if (arr[mid] == x)
return mid;
// If x greater, ignore left half
if (arr[mid] < x)
left = mid + 1;
// If x is smaller, ignore right half
else
right = mid - 1;
}
// If we reach here, then element was not present
return -1;
}
void freeAll()
{
for(int i = 0; i<MAX_NAME_TABLE_SIZE;i++)
{
if(symbol_Table[i] != NULL)
{
free(symbol_Table[i]);
}
if(assembly_Code[i] != NULL)
{
free(assembly_Code[i]);
}
if(assembly_Code[i] == NULL && symbol_Table[i] == NULL)
{
break;
}
}
}
//==================================================================================================================================================================//
//==================================================================================================================================================================//
//==================================================================================================================================================================//
//Project Questions//
/*
> When we are inserting a procedure into the symbol table, why do we leave its address to a question mark? Answer: we will not be working linearly
> How will be connecting our vm and our lexeme?
> how exactly should we be changing scanner for the future assignments? answer: remove whatever is not in the grammar and view them as identifiers.
> How many lines of assmebly code will we have? For the assembly code struct?
> Operations MUL 0 3 -OR- OPP 0 3
> will we have to create a little vm with a stack within this code to keep track of where things are in memory. Answer: handles itself
> on page 11 of the HW3 why does it include modsym when that was not apart of HW2?
> CAN WE BE CERTAIN ADDRESSES for assembly codes like jmp and jpc are 3 incremented. Eg. after each code address increment by 3 like in the vm assignment
> When "emmiting", are we printing the assembly code to the screen or are we just storing it for later printing. For example, in the case that there is an error should have printed the valid assembly code to the screen that preceded that error.
>if token == whilesym
get next token
loopIdx = current code index <----- what index is this refering to? Code PL?
CONDITION
if token != dosym
error
get next token
jpcIdx = current code index <--
emit JPC
STATEMENT
emit JMP (M = loopIdx)
code[jpcIdx].M = current code index <--
return
>if token == ifsym
get next token
CONDITION
jpcIdx = current code index
emit JPC
if token != thensym
error
get next token
STATEMENT
code[jpcIdx].M = current code index
return
> what is CURRENT CODE INDEX and what is code[], it is represented as a struct here.
> why do we need to have jumps if we are not using procedures and why are they apart of if
> what is meant by "emit read" and "emit write" ? IF THE ERROR STATEMENT IS ALL THAT IS PRINTED ON ERROR,
> what do we do for all the sudo he put up?
*/
//======================Project Rules======================//
/*
// Symbol Table
> When seeking a variable name during a lookup. The seek pointer should be pointing at the same location as the table pointer(tp), and then iterate the back towards index 0. (seek = tp)
> When we call a new procedure the we must set mark == 1 for all of the variables in the previous procedure.s
> Everytime a variable is called we must complete a lookup throught the Symbol table
> If we try to use a variable that was not declared we can use the sentinel technique. This technique is where we leave the index 0 of the array empty to store the name of the varible we are seeking so that if that variable is found and seek == 0, then the variable was not declared and an error in the message must be emmited.
//Grammar/Syntax (Context Free Grammar)
> to check for sytanx we will create function that check sytax rules of each section of the code. Each time we call a function we must check if the rules are followed or print an error to the screen if the sytnax is wrong. This will allow us to check the sytanx of many differnt looking peices of code.
> A context free language is defined by a 4-tuple(T,N,R,S)
1 terminal symbols (valid words that are a within syntatic class)(T)
2 non-terminal Symbols (aka the syntatic classes)(N)
3 Syntatic equations(R)
4 Start Symbol(S)
> rule get a token and call the <program> function all the way down to a terminal symbol or sytanx error. By doing this we create a sort of tree structure more specifically inorder search.
> basically while parsing the lexeme token by token we will use rules (also known as left most derivation).
{Example code}
<expr> {
if ( "(" == token) {
call <expr>
if ( ")" == token){
return;
} else {
}
}
...
...
...
}
> Remove any keywords that are not part of grammer inside of scanner
> Scanner and Parser should be within the same program.
{Example Code}
Begin
statement;
statment;
statment
end
(equals)
Begin
statement;
statment;
end
lexeme: 21 xx 18 xx 18 xx 22
lexeme: 21 xx 18 xx 18 22
> when we print out the symbol table the mark on everything should be 1
// ASSEMBLY CODE PART
> We will have two datastructures one will be the stack that will be linear since there are no
procedures and therefore no jumps and we will also have the text which is the assembly code which we will print out from our the dataStructure of our choosing.
> Assembly code jumps by 3
> Stack goes up by one
> SL DL RV | X Y Z TY
> Assembly code: 7 0 3
// Symbol table
// assembly code increment for addresses is linear increment by 3 always. However, variable count
// can affect the M values that get plugged for lod and sto when adding it into struct
y := 7 * (u * 3)
y := 7 * 3u
y := 21u
===========================
{Ambiguity}
order of operations (left most derivation)
a + b * c
is should produce the same output as
b * c + a
but with how our program is, the first one will do (a+b)*c when it should be
a+(b*c).
with in expression() and term() we must make it so that multiplication has precedence over addition. Look at pg 20 in slides and 23. it is already built in within the psuedocode.
===========================
{Recursive Descent parsing with left recursion}
> basically if we do not have basecases within our recursive function we could end up having a stack overflow.
===========================
{Left factoring}
> This causes a compiler to be slower
{example}
we will not want this:
IF<C>THEN<ST>
IF<C>THEN<ST>ELSE<ST>
instead we will do:
IF<C>THEN<ST><X>
where <x> ::= ELSE<ST> | STOP
> What this does it remove the rule of backtracking making our compiler slower.
> we basically need to create a method that acts as the basecase. Exam question
===========================
{very easy test case}
var a, b,c; // populate the symbol table
a := b + c.
{output should be}
LOD 0 4
LOD 0 5
ADD 0 2
STO 0 3
SYS 0 3
===========================
We have to update our code so that it follows left factoring.
==================After class questions answers============
> For this program we will print to the terminal like we saw in the HW3 file. PL/0 the code and then the assembly code. But that is not all. Along with printing each line of assembly code, we must store that code in an array/datastructure and save it to the file where for example LOD 0 3 -> 7 0 3. This is so that we can run our assembly code within the vm.
> For this project we do not need to create a stack nor do we need to have the the text(assembly code)
written somewhere for us to parse. Instead, we must just keep track of where we are with in those concepts so that when we run our assembly code within the vm, the stack will be created properly. Therefore the address of variables etc. must increment by 1, and the address of the assembly code(text) must increment by 3. That is all we have to worry about in this project.
> The psuedo code he gave us already takes care of the properly perfoming Recursive Descent parsing with left recursion to avoid infinite recurisve loops.
===================================
> Syntax Graphs
A way to represent the the function of grammar.
===================================
>We can take out emmit positve and negative
and he will send out an announcement.
*/
//==========Functions===========//
// >use a universal index that all the below functions can edit,this will go to the end of codePL
// >it will return the NEXT token in code PL
// > it should be able to either ignore the variable identifier names to go to the next token, or deal with it directly.
// Ignoring it may be better, as a seperate function could be responable for grabbing the identifer from codePL
// and thereafter incrementing the universal index appropriately. (helper function could be GRAB_NAME(index))
//universialIndex
char* GET_Token()
{
// printf("\n\nEntering the GET_Token\n");
// initializing variables
char* RETVAL = (char*) malloc(sizeof(char) * 12);
// printf("This is Global Lexeme: %s\n", global_Lexeme);
int length = strlen(global_Lexeme);
int foundIt = 0;
//initializing token and word
char* token = (char*) malloc(sizeof(char) * 1000);
token[0] = '\0';
char specialCharacter;
int index = 0;
int tokenToInt;
char stopChar = ' ';
if(universialIndex == length - 1)
{
RETVAL = NULL;
return RETVAL;
}
// printf("This is universalIndex: %d\n", universialIndex);
for(int i = universialIndex; i < length ;i++, universialIndex++)
{
// printf("We have entered the for loop\n");
// if we see a space then we continue.
if (global_Lexeme[i] == ' ')
{
continue;
}
strncat(token, global_Lexeme + i, 1);
// printf("When i is %d token is %s\n", i, token);
// continue if we have multiple numbers/chars
if (global_Lexeme[i+1] != ' ') {
continue;
}
if (strcmp(token, "20") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token); // is this psossible when retval destination is null? I will check it
memset(token, '\0', 1000);
break;
}
if (strcmp(token, "10") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
if (strcmp(token, "12") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
if (strcmp(token, "13") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
if (strcmp(token, "14") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
if (strcmp(token, "-5") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
if (strcmp(token, "3") == 0) {
//copy whatever the token is to RETVAL
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
// done
if (strcmp(token, "2") == 0) {
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
// printf("We are about to check the specialSymbolTokens for loop token is %s\n", token);
index = -1;
tokenToInt = atoi(token);
// printf("when token is %s tokenToInt is %d\n", token, tokenToInt);
if (strcmp(token, "0") == 0 || tokenToInt < 0 || tokenToInt > 0)
{
for (int j = 0; j < 15; j++) {
if (specialTerminalSymbolsTokens[j] == tokenToInt)
{
// store token in retval
strcpy(RETVAL, token);
memset(token, '\0', 1000);
foundIt = 1;
break;
}
}
if(foundIt)
{
break;
}
}
// for (int j = 0; j < 15; j++) {
// if (specialTerminalSymbolsTokens[j] == tokenToInt)
// {
// // store token in retval
// strcpy(RETVAL, token);
// memset(token, '\0', 1000);
// break;
// }
// }
// printf("We just left the specialSymbolTokens for loop token is %s\n", token);
// printf("We are about to check the resWordsTokens for loop token is %s\n", token);
// printf("PASSING\n");
index = binarySearch(resWordsTokens, 0, 15, tokenToInt);
if(index != -1) {
// store token in retval
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
// printf("We just left thye resWordsTokens for loop token is %s\n", token);
// printf("doing normal strcpy when token is %s\n", token);
strcpy(RETVAL, token);
memset(token, '\0', 1000);
break;
}
free(token);
// printf("about to return\n");
universialIndex++;
// printf("This is RETVAL before returning %s\n", RETVAL);
return RETVAL;
}
int Get_TokenInteger()
{
//printf("GET_TokenInteger enter\n ");
char* temp = GET_Token();
if(temp != NULL)
{
tokencount++;
//printf("Get token: %s\n", temp);
int stringLength = strlen(temp);
//printf("Get token len: %d\n", stringLength);
for(int i = 0 ; i<stringLength;i++)
{
if(isdigit(temp[i]) == 0)
{
return -10;
}
}
int ret = atoi(temp); // frees for you apparently, when I had free temp after this, it gave me a double free exception
free(temp);
return ret;
}
return -11;
}
void PROGRAM()
{
namerecord_t* newCode = initializeNameRecord(0," ", 0, 0, 0, 0);
symbol_Table[0] = newCode;
newCode = initializeNameRecord(3,"main", 0, 0, 3, 1);
symbol_Table[1] = newCode;
assembly_Node* newAssCode;
newAssCode = initializeAssemblyRecord(7, 0, 3);
//printf("%d JMP 0 3\n\n",universalCodeText);
assembly_Code[0] = newAssCode;
//universalCodeText++;
//universalCodeAddress += 3;
//printSymTbl();
//printf("Program %d\n",TOKEN);
while (TOKEN != endsym)
{
BLOCK();
if(TOKEN == endsym)
{
break;
}
int temp = TOKEN;
TOKEN = Get_TokenInteger();
if (TOKEN == -11)
{
TOKEN = temp;
break;
}
}
//printf("TOKEN is %d after While loop\n", TOKEN);
//printf("reentering Block with %d\n", TOKEN);
//Call token looking for '.' ENDING PROGRAM
//printf("out Token Grab While %d\n",TOKEN);
TOKEN = Get_TokenInteger();
while(TOKEN == endsym) TOKEN = Get_TokenInteger();
//printf("Block post token grab %d\n", TOKEN);
if (TOKEN != periodsym || TOKEN == -11)
{
printf("Error: program must end with period\n");
exit(0);
}
else
{
//printf("YAYYYY WE MADE IT TO THE PERIOD EXTIO!!!!\n");
// Store Assembly SYS END is added
assembly_Node* newAssCode;
newAssCode = initializeAssemblyRecord(9, 0, 3);
//printf("%d SYS 0 3\n",universalCodeText);
assembly_Code[universalCodeText] = newAssCode;
universalCodeText++;
universalCodeAddress += 3;
//Print Assembly code
printAssCodes();
printSymTbl();
}