You are on page 1of 23

Language Processors

SOLUTION MANUAL
VI Sem, BE (CS&E)

JayaKrishna R
Faculty, Dept. of CSE,
MIT, Manipal.

DEPT OF COMPTER SCIENCE & ENGG.

M. I. T., MANIPAL
WEEK 3 – 11: Design of Mini Compiler for C Language for the given subset

The formal grammar (sample):

Program - main () { declarations statement-list }

declarations data-type identifier-list; declarations 

data-type  intchar

identifier-list  idid, identifier-list id[number] , identifier-list | id[number]

statement_list  statement ; statement_list 

statement  assign-stat  decision_stat  looping-stat

assign_stat  id = expn

expn simple-expn eprime

eprimerelop simple-expn|

simple-exp term seprime

seprimeaddop term seprime |

term  factor tprime

tprime  mulop factor tprime |

factor  idnum

decision-stat  if ( expn ) stat dprime

dprime  else stat | 

looping-stat  while (expn) statfor (assign_stat ; expn ; assign_stat ) stat

relop  = =!=<=>=><

addop  +-

mulop  * /  %

Language Processors
Page 2
WEEK 3 & 4: Design of Lexical analyzer

To construct an adhoc Lexical Analyzer.

 Identifying different classes of tokens like: keywords, identifiers and special


symbols.
 Selecting a suitable data structure for symbol table (the alternates are linked
list, hashing, array of structures, binary search tree)
 Having selected a data structure, identifying the appropriate fields.

Solution:

#define KEY_CTR 12
FILE *fin, *fout;
char line[80],*ptr,filename[20],token[30];

enum token_class {KEY=20,IDENTIFIER,DIGIT,COMMA,SEMICOLON,COLON,DOT,


EQ,GT,LT,EQUEQU,GEQ,LEQ,NEQ,LB,RB,LSB,RSB,LCB,RCB,
INCR,DECR,SQUOTE,DQUOTE,ADD,SUB,MUL,DIV,MOD,HASH,
DEFAULT,UNDEF};
char keywords[KEY_CTR][10] =
{"void","int","char","float","if","else","while","include","for","break
","continue","main"};

int scanner()
{ int i=0;

while(*ptr==' ' || *ptr=='\t') {ptr++;}

token[0]=*ptr; token[1]='\0';

switch(*ptr)
{
case '#' :ptr++; return HASH; //Single Character Tokens
case '(' :ptr++; return LB;
case ')' :ptr++; return RB;
case '{' :ptr++; return LCB;
case '}' :ptr++; return RCB;
case '[' :ptr++; return LSB;
case ']' :ptr++; return RSB;
case ';' :ptr++; return SEMICOLON;
case '.' :ptr++; return DOT;
case '*' :ptr++; return MUL;
case '/' :ptr++; return DIV;

Language Processors
Page 3

case '>' : ptr++;


if(*ptr=='=')
{token[1]='='; token[2]='\0';ptr++; return GEQ;}
else { return GT;}
case '<' : ptr++;
if(*ptr=='=')
{token[1]='='; token[2]='\0';ptr++; return LEQ;}
Else {return LT;}
default : //multi character tokens

if(isalpha(*ptr))
{ i=0;
while(isalnum(*ptr))
{ token[i++]=*ptr; ptr++;}
token[i]='\0';

for(int j=0;j<KEY_CTR;j++)
{ if(strcmp(keywords[j],token)==0)
{ KEYtype=j;
return KEY;
}
}
return IDENTIFIER;
}
else if(isdigit(*ptr))
{ i=0;
while(isdigit(*ptr))
{ token[i++]=*ptr; ptr++;}
token[i]='\0';
return DIGIT;
}
else
{ ptr++; return UNDEF;}
}//end of case

The different data Structures that can be used are:


a) Linked list.
b) Array of Structures

The appropriate fields are Name, Type, Value, etc.


Language Processors
Page 4
WEEK 5, 6 AND 7: Design of a Predictive Parser

To code and test parser:

 Students should write a formal grammar for the given C subset(Refer


Sample Grammar given above)

 Remove left recursion from each of the productions so that the underlying
grammar can be parsed with a predictive parser.

 The parser obtains a string of tokens from the lexical analyzer and verifies
that the string can be generated by the grammar for the C language.

 The parser should report syntax errors if any (for eg.: Misspelling an
identifier or keyword, Undeclared or Multiply declared identifier,
Arithmetic or Relational Expressions with unbalanced parentheses and
Expression syntax error etc.) with appropriate line-no.

For a given grammar:


1. Eliminate the Left Recursion
2. Left Factor the grammar
3. Write down function for each Nonterminal the grammar

Eg: Design a parser for the following grammar:

E -> E+T|T
T -> T*F | F
F -> (E)|id

After doing the modifications


The grammar becomes

E -> TE’
E’ -> +TE’|e
T -> FT’
T’->*FT’ |e
F -> (E)|id

Language Processors
Page 5
Write the Parser as follows:

Void E()
{
T();
EPRIME();
}

Void EPRIME( )
{
If(input-symbol ==”+”)
{
ADVANCE( );
T( );
EPRIME( );
}
}

Void T( )
{
F( );
TPRIME( );
}

Void TPRIME( )
{
if (input-symbol ==‘*’)
{
F( );
TPRIME( );
}
}

void F( )
{
if (input-symbol ==‘(‘)
{
ADVANCE( );
E( );
if (input-symbol==‘)’)
ADVANCE( );
Else if (input-symbol ==‘id’)
ADVANCE( );

Language Processors
Page 6
else ERROR( );
}
Parser Code for given C language grammar:
void declarations()
{
s=lex();
if(s==KEY && (KEYtype==INT||KEYtype==CHAR))
{ cprintf("_____________________________KEYWORD detected\r\n");
idList(KEYtype);
cprintf("End of a Declaration by SEMICOLON\r\n");
declarations();
}
else
{ ep_flag=YES;}//epsilon transition so no error also
}

int Missing_Error=NO;
void idList(int datatype)
{
s=lex();
if (s!=IDENTIFIER)
{// Missing_Error=YES;
if(s==COMMA||s==SEMICOLON)
{ep_flag=YES;
strcpy(errtype,"Identifier Missing");put_error(lineno);}
else
{
sprintf(errtype,"Illegal Character '%s'", token);
put_error(lineno);}
}
//don't use else here cause scan the remaining
strcpy(varname,token);
idList_X(datatype);

void idList_X(int datatype)


{
s=lex();
if (s==COMMA && Missing_Error==NO)
{ ADD_ST(varname,datatype,size);

idList(datatype);
}
else if(s==LSB)
{
s=lex(); //get number
if (s!=DIGIT)

Language Processors
Page 7
{ep_flag=YES;strcpy(errtype,"Missing Array size after '['");
put_error(lineno);}
else
{size=atoi(token);}

s=lex(); //get ']'


if(s !=RSB)
{ep_flag=YES;
strcpy(errtype,"Missing ']'");
put_error(lineno);
}

idList_X(datatype);
}
else if (s==SEMICOLON && Missing_Error==NO)
{

ADD_ST(varname,datatype,size);
}
else
{ep_flag=YES;
strcpy(errtype,"Missing ';'");
put_error(lineno);
}
Missing_Error=NO;
}

void statement()
{
s=lex();
if (s==LCB){
cprintf("***COMPOUND_STATEMENT\r\n");
comp_stat();
}
else { ep_flag=YES;
cprintf("*****SIMPLE_STATEMENT\r\n");
simple_stat();
}
}

void comp_stat()
{
statement();
s=lex(); //get '}'
if(s!=RCB){
ep_flag=YES;
strcpy(errtype,"Missing '}'");
put_error(lineno);
}
// statement();
}
Language Processors
Page 8
void simple_stat()
{
s=lex();

if(s==IDENTIFIER)
{ assign_stat();
statement();
}
else if (s==KEY)
{
if(KEYtype== IF)
{ decesion_stat();
statement();
}
else if(KEYtype==WHILE || KEYtype==FOR)
{ looping_stat();
statement();
}
else if (KEYtype==CONTINUE ||KEYtype==BREAK)
{ jump_stat();
statement();
}
}
else{ep_flag=YES;} //because lex has been already called.
}

void assign_stat()
{ cprintf("ASSIGNMENT STATEMENT for identifier: %s\r\n",token);
char gotID[30];
strcpy(gotID,token);//get the identifier name
int IDtype=INT;///check from symbol table

if (IDtype==INT)
{
s=lex();
if (s != EQ)
{
ep_flag=YES;
strcpy(errtype,"Missing '='");
put_error(lineno);
}
expn();

s=lex();
if (s != SEMICOLON)
{
Language Processors
Page 9
ep_flag=YES;
strcpy(errtype,"Missing ';'");
put_error(lineno);
}
}

else if (IDtype==CHAR)
{
//not in the question for handeling them
}

int relop()
{
s=lex();

if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ)


{ …..
}
else
{ep_flag=YES;return NO;}
}

void expn()
{
single_arg_flag=YES;
simple_expn();
strcpy(fa1,fa);
if(relop()==YES) //check if there are real op;
{
simple_expn();
}
else
{ //when we have a single argument ie a=10; a=c; etc...

}
}

void simple_expn()
{
term();
simple_expn_2();
}

void simple_expn_2()
{
s=lex();
if (s==ADD || s==SUB) //operator
{ single_arg_flag=NO;
term();
Language Processors
Page 10
simple_expn_2();
}
else
{ep_flag=YES;}
}

void term()
{
factor();
term_not();
}

void term_not()
{
s=lex();
if (s==MUL ||s==DIV|| s==MOD)
{ single_arg_flag=NO;

factor();

term_not();
}
else
{ep_flag=YES;}

int find_in_ST(char stext[])


{
for(int j=0;j<ST_CTR;j++) {
if (strcmp(stext,ST[j].id)==0){return YES;}
}
return NO;//not found
}

void factor()
{
s=lex();
if (s==IDENTIFIER || s==DIGIT)
{
if(s==IDENTIFIER && find_in_ST(token)==NO)
{
sprintf(errtype,"Undefined Symbol '%s'",token);
put_error(lineno);}
strcpy(fa,token);
}
else
{ strcpy(fa,"");
ep_flag=YES;
Language Processors
Page 11
sprintf(errtype,"Missing ID or Number");
put_error(lineno);
}
}

void parser()
{
fgets(line,80,fin); ptr=line; lineno++;

s=lex();
if (s==KEY && KEYtype==MAIN)
{ cprintf("main() function is defined\r\n");

s=lex();//(
if(s!=LB)
{
ep_flag=YES;
strcpy(errtype,"Missing '('");put_error(lineno);
}
s=lex();//)
if(s!=RB) {
ep_flag=YES;
strcpy(errtype,"Missing ')'");
put_error(lineno);
}
s=lex();//{
if(s!=LCB){
ep_flag=YES;
strcpy(errtype,"Missing main '{'");
put_error(lineno);
}
cprintf("BEGINNING of DECLARATION REGION\r\n");

declarations();

statement();
s=lex();//}
if(s!=RCB){ …..}
}
}

int main()
{

if ((fin = fopen("c:\\source.cpp", "rt")) == NULL)


{
fprintf(stderr, "Cannot open input file.\n");
getch();
Language Processors
Page 12
return 1;
}

parser();

display_ST();
display_ET();

getch();
clrscr();
fclose(fin);

getch();
return 0;
}

Language Processors
Page 13
WEEK 8, 9 and 10: Design of Code generator

 The target code to be generated is 8086 assembly language program.

 Registers have to be selected for each of the variables used by the program.

 Code generator will take each line of the source program and generate its
equivalent assembly code.

Solution: Here we, modify the parser and associated functions such that it also generates code
for the language constructs in addition to parsing. The Modified parser code is as follows:

void idList_X(int datatype)


{
s=lex();
if (s==COMMA && Missing_Error==NO)
{ ADD_ST(varname,datatype,size);
if (datatype==INT)
{
fprintf(fout," %-10s DW 0\n",varname);
}
else
{
fprintf(fout," %-10s DB 0\n",varname);
}

idList(datatype);
}
else if(s==LSB)
{
s=lex(); //get number
if (s!=DIGIT)
{
ep_flag=YES;
strcpy(errtype,"Missing Array size after '['");
put_error(lineno);
}
else {size=atoi(token);}

Language Processors
Page 14
s=lex(); //get ']'
if(s !=RSB)
{
ep_flag=YES;
strcpy(errtype,"Missing ']'");
put_error(lineno);
}

idList_X(datatype);
}
else if (s==SEMICOLON && Missing_Error==NO)
{
if (datatype==INT)
{fprintf(fout," %-10s DW 0\n",varname);}
else
{fprintf(fout," %-10s DB 0\n",varname);}

ADD_ST(varname,datatype,size);
}
else
{
ep_flag=YES;
strcpy(errtype,"Missing ';'");
put_error(lineno);
}
Missing_Error=NO;
}

void statement()
{
s=lex();
if (s==LCB)
{
cprintf("***COMPOUND_STATEMENT\r\n");
comp_stat();
}
else
{
ep_flag=YES;
cprintf("*****SIMPLE_STATEMENT\r\n");
simple_stat();
}
}

Language Processors
Page 15
void comp_stat()
{
statement();
s=lex(); //get '}'
if(s!=RCB)
{
ep_flag=YES;
strcpy(errtype,"Missing '}'");
put_error(lineno);
}
}

void simple_stat()
{
s=lex();

if(s==IDENTIFIER)
{ assign_stat();
statement();
}
else if (s==KEY)
{
if(KEYtype== IF)
{
decesion_stat();
statement();
}
else if(KEYtype==WHILE || KEYtype==FOR)
{ looping_stat();
statement();
}
else if (KEYtype==CONTINUE ||KEYtype==BREAK)
{ jump_stat();
statement();
}
}
else{ep_flag=YES;} //because lex has been already called.
}

Language Processors
Page 16
void assign_stat()
{
cprintf("ASSIGNMENT STATEMENT for identifier: %s\r\n",token);
char gotID[30];
strcpy(gotID,token);//get the identifier name
int IDtype=INT;///check from symbol table

if (IDtype==INT)
{
s=lex();
if (s != EQ)
{
ep_flag=YES;
strcpy(errtype,"Missing '='");
put_error(lineno);
}
expn();
fprintf(fout," MOV %s,AX\n",gotID);
s=lex();
if (s != SEMICOLON)
{
ep_flag=YES;
strcpy(errtype,"Missing ';'");
put_error(lineno);
}
}
else if (IDtype==CHAR)
{
//not in the question for handeling them
}

int relop()
{
s=lex();

if(s==EQUEQU || s==NEQ|| s==LT|| s==GT || s==LEQ|| s==GEQ)


{ …..
}
else
{
ep_flag=YES;
return NO;}
}

Language Processors
Page 17
void expn()
{
single_arg_flag=YES;
simple_expn();
strcpy(fa1,fa);
if(relop()==YES) //check if there are real op;
{
simple_expn();
fprintf(fout," MOV AX,%s\n",fa1);
fprintf(fout," MOV BX,%s\n",fa);
}
else
{ //when we have a single argument ie a=10; a=c; etc...
if(single_arg_flag==YES)
fprintf(fout," MOV AX,%s\n",fa);
}
}

void simple_expn()
{
term();
simple_expn_2();
}

void simple_expn_2()
{
s=lex();
if (s==ADD || s==SUB) //operator
{ single_arg_flag=NO;
sprintf(tempcode," MOV AX,%s\n",fa);

if (s==ADD)
sprintf(tempcode2," ADD AX,BX");
else
sprintf(tempcode2," SUB AX,BX");

term();
fprintf(fout,"%s",tempcode); //mov AX,a
fprintf(fout," MOV BX,%s\n",fa);//MOV BX,b
fprintf(fout,"%s\n",tempcode2);//ADD c,result

simple_expn_2();
}
else
{ep_flag=YES;}
}

Language Processors
Page 18
void term()
{
factor();
term_not();
}

void term_not()
{
s=lex();
if (s==MUL ||s==DIV|| s==MOD)
{ single_arg_flag=NO;
sprintf(tempcode3," MOV AX,%s\n",fa);

if (s==MUL)
sprintf(tempcode4," MUL BX");
else
sprintf(tempcode4," DIV BX");
factor();

fprintf(fout,"%s",tempcode3);//mov AX,a
fprintf(fout," MOV BX,%s\n",fa);//mov BX,b

fprintf(fout,"%s\n",tempcode4);//mul AX,BX
fprintf(fout," MOV temp,AX\n");//temp or dx stores the
//result;
strcpy(fa,"temp");

term_not();
}
else
{ep_flag=YES;}

}
int find_in_ST(char stext[])
{
for(int j=0;j<ST_CTR;j++)
{
if (strcmp(stext,ST[j].id)==0){return YES;}
}
return NO;//not found
}

void factor()
{
s=lex();

Language Processors
Page 19
if (s==IDENTIFIER || s==DIGIT)
{
if(s==IDENTIFIER && find_in_ST(token)==NO)
{
sprintf(errtype,"Undefined Symbol '%s'",token);
put_error(lineno);
}
strcpy(fa,token);
}
else
{ strcpy(fa,"");
ep_flag=YES;
sprintf(errtype,"Missing ID or Number");
put_error(lineno);
}
}

void parser()
{
fgets(line,80,fin); ptr=line; lineno++;

s=lex();
if (s==KEY && KEYtype==MAIN)
{
cprintf("main() function is defined\r\n");

s=lex();//(
if(s!=LB)
{
ep_flag=YES;
strcpy(errtype,"Missing '('");
put_error(lineno);
}
s=lex();//)
if(s!=RB)
{
ep_flag=YES;
strcpy(errtype,"Missing ')'");
put_error(lineno);
}

s=lex();//{
if(s!=LCB)
{
ep_flag=YES;
strcpy(errtype,"Missing main '{'");

Language Processors
Page 20
put_error(lineno);
}
cprintf("BEGINNING of DECLARATION REGION\r\n");
declarations();
fprintf(fout,"DATA ENDS\n\nCODE SEGMENT\n");
fprintf(fout,"ASSUME CS:CODE DS:DATA\n”);
fprintf(fout,”START: MOV AX,DATA\n MOV DS,AX\n\n");

statement();
s=lex();//}
if(s!=RCB){ …..}
}
}
int main()
{
clrscr();

if ((fin = fopen("c:\\source.cpp", "rt")) == NULL)


{
fprintf(stderr, "Cannot open input file.\n");
getch();
return 1;
}

if ((fout = fopen("c:\\code.asm", "wt"))== NULL)


{
fprintf(stderr, "Cannot open output file.\n");
getch();
return 1;
}

fprintf(fout,"DATA SEGMENT\n");
fprintf(fout,"temp DW 0\n");

parser();

fprintf(fout,"\n MOV AH,4CH\n INT 21H\n");


fprintf(fout,"CODE ENDS\nEND START\n");
display_ST(); display_ET();

getch();
clrscr();
fclose(fin);
fclose(fout);

getch();

Language Processors
Page 21
return 0;
}

Sample Lab Exam questions

1) Write the Lexical Analyzer to identify various tokens for a given language
(say Pascal)

2) Generate the parser for the following grammar


start REPEAT stmt UNTIL condition ;
stmt id= term arithop term
condition  term relop term
term  id | num
arithop  + | -
relop  < | > | = =

Sample input:- REPEAT count = count + increment UNTIL count = =10

3) Generate the 8086 code for the following

StartREPEAT statement UNTIL condition


Statementid:=expr;
Conditionid<num
Exprterm expr`
Expr`+term expr’ /ε
Term factor term`
Term’*factor term’ /ε
Factorid

Ex:- REPEAT
A:=B+C*D
UNTIL I <10

Language Processors
Page 22
References:

1. Compiler principles, Techniques and tolls by Alfred V. Aho, Ravi Sethi, Jeffrey D.
Ullman
2. Principles of compiler Design by Alfred V. Aho and Jeffrey D. Ullman
3. Engineering a Compiler by Keith D. Cooper and Linda Torczon
4. Introduction to compiler construction by Thomas W. Parsons
5. Compiler Design in C by Holub A.I

Language Processors
Page 23

You might also like