Professional Documents
Culture Documents
SESSION :- 2019-2020
A Practical file
On
Compiler Design
[CS-7002]
From
Computer Science and Engineering
4th year(7th sem.)
SUBMITTED TO :- SUBMITTED BY :-
Jyant Vyas
0159CS161002
Page | 1
RGPM
Index
S.nO ObjectIveS p.nO. date SIgnature
Page | 2
RGPM
experIMent-1
DEVELOP A LEXICAL ANALYZER TO RECOGNIZE A
FEW PATTERNS.
ALGORITHM:
Step1: Start the program.
Step2: Declare all the variables and file pointers.
Step3: Display the input program.
Step4: Separate the keyword in the program and display it.
Step5: Display the header files of the input program
Step6: Separate the operators of the input program and display it.
Step7: Print the punctuation marks.
Step8: Print the constant that are present in input program.
Step9: Print the identifiers of the input program.
PROGRAM CODE:
#include<string.h>
#include<ctype.h>
#include<stdio.h>
#include<stdlib.h>
void keyword(char str[10])
{
if(strcmp("for",str)==0||strcmp("while",str)==0||strcmp("do",str)==0||strcmp("int",str)==0
||strcmp("float",str)==0||strcmp("char",str)==0||strcmp("double",str)==0||strcmp("printf"
,str)==0||strcmp("switch",str)==0||strcmp("case",str)==0)
printf("\n%s is a keyword",str);
else
printf("\n%s is an identifier",str);
}
void main()
{
FILE *f1,*f2,*f3;
char c,str[10],st1[10];
int num[100],lineno=0,tokenvalue=0,i=0,j=0,k=0;
f1=fopen("input","r");
f2=fopen("identifier","w");
f3=fopen("specialchar","w");
while((c=getc(f1))!=EOF)
{
Page | 3
RGPM
if(isdigit(c))
{
tokenvalue=c-'0';
c=getc(f1);
while(isdigit(c))
{
tokenvalue*=10+c-'0';
c=getc(f1);
}
num[i++]=tokenvalue;
ungetc(c,f1);
}
else
if(isalpha(c))
{
putc(c,f2);
c=getc(f1);
while(isdigit(c)||isalpha(c)||c=='_'||c=='$')
{
putc(c,f2);
c=getc(f1);
}
putc(' ',f2);
ungetc(c,f1);
}
else
if(c==' '||c=='\t')
printf(" ");
else
if(c=='\n')
lineno++;
else
putc(c,f3);
}
fclose(f2);
fclose(f3);
fclose(f1);
printf("\n the no's in the program are:");
for(j=0;j<i;j++)
printf("\t%d",num[j]);
printf("\n");
f2=fopen("identifier","r");
k=0;
printf("the keywords and identifier are:");
while((c=getc(f2))!=EOF)
if(c!=' ')
str[k++]=c;
Page | 4
RGPM
else
{
str[k]='\0';
keyword(str);
k=0;
}
fclose(f2);
f3=fopen("specialchar","r");
printf("\n Special Characters are");
while((c=getc(f3))!=EOF)
printf("\t%c",c);
printf("\n");
fclose(f3);
printf("Total no of lines are:%d",lineno);
}
OUTPUT:
RESULT:
Thus the program for developing a lexical analyzer to recognize a few patterns in C has been
executed successfully.
Page | 5
RGPM
experIMent-2
WRITE A PROGRAMME TO PARSE USING BRUTE
FORCE TECHNIQUE OF TOPDOWN PARSING.
#include<stdio.h>
#include<conio.h>
#include<iostream.h>
void main()
{
int a[30];
clrscr();
int min=10000,temp=0,i,lev,n,noofc,z;
printf("please enter how many number");
cin>>n;
for(i=0;i<n;i++)
a[i]=0;
cout<<"enter value of root";
cin>>a[0];
for(i=1;i<=n/2;i++)
{
cout<<"please enter no of child of parent with value"<<a[i-1]<<":";
cin>>noofc;
for(int j=1;j<=noofc;j++)
{z=(i)*2+j-2;
cout<<"please enter value of child";
cin>>a[z];
}
}
for(i=n-1;i>=n/2;i--)
{
temp=0;
for(int j=i+1;j>=1;j=j/2)
temp=temp+a[j-1];
if(temp<min)
min=temp;
cout<<"temp min is"<<temp<<"\n";
}
cout<<"min is"<<min;
getch();
}
Page | 6
RGPM
experIMent-3
Operator Precedence Parsing Program in C - C Program to
Implement Operator Precedence Parsing
Parsing (Syntax analysis) is a topic in compiler construction. Operator Precedence parsing is one
of i. For example, a sample input string to the operator precedence parser is i*(i+i).
the parsing techniques for ambiguous grammars. It solves the ambiguity by using operator
precedence. In this post we will see a C program which implement operator precedence parsing to
check the syntax for given input string. In input string (here a mathematical expression) the
identifiers are denoted by
#include<stdio.h>
#include<string.h>
char *input;
int i=0;
char lasthandle[6],stack[50],handles[][5]={")E(","E*E","E+E","i","E^E"};
//(E) becomes )E( when pushed to stack
int top=0,l;
char prec[9][9]={
/*input*/
/*stack + - * / ^ i ( ) $ */
Page | 7
RGPM
/* + */ '>', '>','<','<','<','<','<','>','>',
/* - */ '>', '>','<','<','<','<','<','>','>',
/* * */ '>', '>','>','>','<','<','<','>','>',
/* / */ '>', '>','>','>','<','<','<','>','>',
/* ^ */ '>', '>','>','>','<','<','<','>','>',
/* i */ '>', '>','>','>','>','e','e','>','>',
/* ( */ '<', '<','<','<','<','<','<','>','e',
/* ) */ '>', '>','>','>','>','e','e','>','>',
/* $ */ '<', '<','<','<','<','<','<','<','>',
};
int getindex(char c)
{
switch(c)
{
case '+':return 0;
case '-':return 1;
case '*':return 2;
case '/':return 3;
case '^':return 4;
case 'i':return 5;
case '(':return 6;
case ')':return 7;
case '$':return 8;
}
}
int shift()
{
stack[++top]=*(input+i++);
stack[top+1]='\0';
}
int reduce()
{
int i,len,found,t;
for(i=0;i<5;i++)//selecting handles
{
len=strlen(handles[i]);
if(stack[top]==handles[i][0]&&top+1>=len)
{
found=1;
for(t=0;t<len;t++)
{
if(stack[top-t]!=handles[i][t])
{
found=0;
break;
}
Page | 8
RGPM
}
if(found==1)
{
stack[top-t+1]='E';
top=top-t+1;
strcpy(lasthandle,handles[i]);
stack[top+1]='\0';
return 1;//successful reduction
}
}
}
return 0;
}
void dispstack()
{
int j;
for(j=0;j<=top;j++)
printf("%c",stack[j]);
}
void dispinput()
{
int j;
for(j=i;j<l;j++)
printf("%c",*(input+j));
}
void main()
{
int j;
input=(char*)malloc(50*sizeof(char));
printf("\nEnter the string\n");
scanf("%s",input);
input=strcat(input,"$");
l=strlen(input);
strcpy(stack,"$");
printf("\nSTACK\tINPUT\tACTION");
while(i<=l)
{
shift();
printf("\n");
dispstack();
printf("\t");
dispinput();
printf("\tShift");
if(prec[getindex(stack[top])][getindex(input[i])]=='>')
{
while(reduce())
{
printf("\n");
dispstack();
printf("\t");
dispinput();
Page | 9
RGPM
printf("\tReduced: E->%s",lasthandle);
}
}
}
if(strcmp(stack,"$E$")==0)
printf("\nAccepted;");
else
printf("\nNot Accepted;");
}
Output:-
Page | 10
RGPM
experIMent-4
Develop A Recursive Descent Parser.
#include<stdio.h>
#include<ctype.h>
#include<string.h>
void Tprime();
void Eprime();
void E();
void check();
void T();
char expression[10];
int main()
count = 0;
flag = 0;
scanf("%s", expression);
Page | 11
RGPM
E();
else
void E()
T();
Eprime();
void T()
check();
Tprime();
void Tprime()
if(expression[count] == '*')
Page | 12
RGPM
count++;
check();
Tprime();
void check()
if(isalnum(expression[count]))
count++;
count++;
E();
if(expression[count] == ')')
count++;
else
flag = 1;
else
Page | 13
RGPM
flag = 1;
void Eprime()
if(expression[count] == '+')
count++;
T();
Eprime();
Output:-
Page | 14
RGPM
experIMent-5
Write a program for generating for various
intermediate code forms.
i) Three address code ii) Polish notation.
such as:
The term three-address code is still used even if some instructions use more or fewer than
two operands. The key features of three-address code are that every instructionimplements
exactly one fundamental operation, and that the source and destinationmay refer to any
available register.
Example
int main(void)
int i;
int b[10];
b[i] = i*i;
The precedingC program, translated into three-address code, might look something like the
following:
Page | 15
RGPM
i := 0 ; assignment
t0 := i*i
goto L1
L2:
Polish Notation
Infix form
Prefix form
Postfix form
Infix form
Is exactly the fully parenthesized notation we have just introduced. Let me remind you
once again the Recursive definition
Examples
(3 * 7)
((1 + 3) * 2)
((1 + 3) * ( 2 - 3))
Question: what if we do not put all the parentheses? Then there are ambiguities on how to
Page | 16
RGPM
interpret an expression: is 1+2*3 the same as (1+2)*3 or the same as 1+(2*3)? The
precedence of operators solves this problem.
Prefix form
Recursive definition of classic version, without parentheses (we do not need them, because
there is no longer any ambiguity on how to match the operands to the operators):
Examples
(* 3 7) or simply * 3 7
(* ( + 1 3) 2) or simply * + 1 3 2
( * ( + 1 3) ( - 2 3)) or simply * + 1 3 - 2 3
Postfix form
Main Feature: the operator is after the two operands. Recursive definition
Recursive definition of classic version, without parentheses (we do not need them, because
there is no longer any ambiguity on how to match the operands to the operators):
Examples
Page | 17
RGPM
(3 7 *) or simply 3 7 *
((1 3 + ) 2 *) or simply 1 3 + 2 *
((1 3 +) ( 2 3 -) * ) or simply 1 3 + 2 3 - *
In class: do several examples of the same expression, in each of the three Polish forms.
Page | 18
RGPM
experIMent-6
GENERATE LEXICAL ANALYZER USING LEX
ALGORITHM:
Step1: Lex program contains three sections: definitions, rules, and user subroutines. Each
section must be separated from the others by a line containing only the delimiter,
%%. The format is as follows: definitions %% rules %% user_subroutines
Step2: In definition section, the variables make up the left column, and their definitions make
up the right column. Any C statements should be enclosed in %{..}%. Identifier is defined such
that the first letter of an identifier is alphabet and remaining letters are alphanumeric.
Step3: In rules section, the left column contains the pattern to be recognized in an input file
to yylex(). The right column contains the C program fragment executed when that pattern is
recognized. The various patterns are keywords, operators, new line character, number, string,
identifier, beginning and end of block, comment statements, preprocessor directive
statements etc.
Step4: Each pattern may have a corresponding action, that is, a fragment of C source code to
execute when the pattern is matched.
Step5: When yylex() matches a string in the input stream, it copies the matched text to an
external character array, yytext, before it executes any actions in the rules section.
Step6: In user subroutine section, main routine calls yylex(). yywrap() is used to get more
input.
Step7: The lex command uses the rules and actions contained in file to generate a program,
lex.yy.c, which can be compiled with the cc command. That program can then receive input,
break the input into the logical pieces defined by the rules in file, and run program fragments
contained in the actions in file.
PROGRAM CODE:
%{
COMMENT=0;
int
%}
Page | 19
RGPM
identifier[a-zA-Z][a-zA-Z0-9]*
%%
#.*{printf("\n%sisapreprocessordirective",yytext);}
int|
float|
char|
double|
while|
for|
struct|
typedef|
do|
if|
break|
continue|
void|
switch|
return|
else|
goto{printf("\n\t%sisakeyword",yytext);}
"/*"{COMMENT=1;}{printf("\n\t%sisaCOMMENT",yytext);}
{identifier}\({if(!COMMENT)printf("\nFUNCTION\n\t%s",yytext);}
\{ {if(!COMMENT)printf("\nBLOCKBEGINS");}
\} {if(!COMMENT)printf("BLOCKENDS");}
{identifier}(\[[0-9]*\])?{if(!COMMENT)printf("\n%sIDENTIFIER",yytext);}
\".*\"{if(!COMMENT)printf("\n\t%sisaSTRING",yytext);}
[0-9]+{if(!COMMENT)printf("\n%sisaNUMBER",yytext);}
\)(\:)?{if(!COMMENT)printf("\n\t");ECHO;printf("\n");}
\(ECHO;
={if(!COMMENT)printf("\n\t%sisanASSIGNMENTOPERATOR",yytext);}
\<=|
\>=|
\<|
==|
\>{if(!COMMENT)printf("\n\t%sisaRELATIONALOPERATOR",yytext);}
%%
intmain(intargc,char**argv)
{
FILE*file;
file=fopen("var.c","r");
if(!file)
{
printf("couldnotopenthefile");
exit(0);
}
yyin=file;
Page | 20
RGPM
yylex();
printf("\n");
return(0);
}
intyywrap()
{ return(1); }
INPUT:
//var.c
#include<stdio.h>
#include<conio.h>
voidmain()
{
inta,b,c;
a=1;
b=2;
c=a+b;
printf("Sum:%d",c);
}
OUTPUT:
RESULT:
Thus the program for implementation of Lexical Analyzer using Lex tool has been executed
successfully.
Page | 21
RGPM
experIMent-7
GIVE ANY INTERMEDIATE CODE FORM
IMPLEMENT CODE OPTIMIZATION TECHNIQUES.
ALGORITHM:
Optimization is the process of transforming a piece of code to make more efficient(either in
terms of time or space) without changing its output or side-effects. The onlydifference
visible to the code’s user should be that it runs faster and/or consumesl e s s
memory. It is really a misn omer that the name im plies you are
f i n d i n g a n "optimal" solution— in truth, optimization aims to improve, not perfect, the
result.Optimization is the field where most compiler research is done today. The tasks of t
hefront-
end (scanning, parsing, semantic analysis) are well understood
a n d unoptimized code generation is relatively straightforward. Optimization, on the
other hand, still retains a sizable measure of mysticism. High-quality optimization is moreof
an art than a science. Compilers for mature languages aren’t judged by how wellthey parse
or analyze the code—you just expect it to do it right with a minimum of hassle—
but instead by the quality of the object code they produce.Many optimization problems are
NP-complete and thus most optimization algorithmsrely on heuristics and
approximations. It may be possible to come up with a casewhere a particular
algorithm fails to produce better code or perhaps even makes itworse.
However, the algorithms tend to do rather well overall.It’s worth reiteratinghere that
efficient code starts with intelligent decisions by the
Types of optimizations
Techniques used in optimization can be broken up among various Scopes which canaffect
anything from a single statement to the entire program. Generally speaking,locally scoped
techniques are easier to implement than global ones but result insmaller gains. Some
examples of scopes include:
Page | 22
RGPM
• Loop optimizations: These act on the statements which make up a loop, suchas afor loop
(eg,loop-invariant code motion). Loop optimizations can have asignificant impact because
many programs spend a large percentage of their time inside loops.In addition to scoped
optimizations there are two further general categories of optimization:
"immediate operand register". (A potential problem with this is that XOR mayintroduce a
data dependency on the previous value of the register, causing apipelinestall. However,
processors often have XOR of a register with itself as aspecial case that doesn't cause stalls.)
Factors affectingoptimization
The machine itself
Many of the choices about which optimizations can and should be done depend onthe
characteristics of the target machine. It is sometimes possible to parameterizesome of these
machine dependent factors, so that a single piece of compiler codecan be
Page | 23
RGPM
• Number of CPU registers: To a certain extent, the more registers, the easier itis to
optimize for performance.Local variablescan be allocated in theregisters and not on
thestack. Temporary/intermediate results can be left inregisters without writing to and
reading back from memory.
• RISCvs.CISC: CISC instruction sets often have variable instruction lengths,often have a
larger number of possible instructions that can be used, andeach instruction could take
differing amounts of time. RISC instruction setsattempt to limit the variability in each of
these: instruction sets are usuallyconstant length, with few exceptions, there are usually
fewer combinations of registers and memory operations, and the instruction issue rate (the
number of instructions completed per time period, usually an integer multiple of theclock
cycle) is usually constant in cases where memory latency is not a factor.There may be
several ways of carrying out a certain task, with CISC usuallyoffering more alternatives than
RISC. Compilers have to know the relativecosts among the various instructions and choose
the best instructionsequence (seeinstruction selection).
• Pipelines: A pipeline is essentially a CPU broken up into an assembly line. Itallows use of
parts of the CPU for different instructions by breaking up theexecution of instructions into
various stages: instruction decode, addressdecode, memory fetch, register fetch, compute,
register store, etc. Oneinstruction could be in the register store stage, while another could
be in theregister fetch stage. Pipeline conflicts occur when an instruction in one stageof the
pipeline depends on the result of another instruction ahead of it in thepipeline but not yet
completed. Pipeline conflicts can lead topipeline stalls:where the CPU wastes cycles waiting
for a conflict to resolve. Compilers canschedule, or reorder, instructions so that pipeline
stalls occur less frequently.
• Number of functional units: Some CPUs have several ALUs andFPUs. Thisallows them to
execute multiple instructions simultaneously. There may berestrictions on which
instructions can pair with which other instructions("pairing" is the simultaneous execution
of two or more instructions), andwhich functional unit can execute which instruction. They
also have issuessimilar to pipeline conflicts. Here again, instructions have to be scheduled so
that the various functionalunits are fully fed with instructions to execute.
• Cache/Memory transfer rates: These give the compiler an indication of thepenalty for
cache misses. This is used mainly in specialized applications.
Page | 24
RGPM
"as fast as possible," so code caching might be disabled, along withcompiler optimizations
that require it.
Page | 25
RGPM
experIMent-8
STUDY OF AN OBJECT ORIENTED COMPILER.
THEORY:
Object orientation was first introduced in Simula ( in 1967), and has been
incorporated in languages such as Smalltalk, C++, C#, and Java.
We have gained them in several projects and used them to great advantage
in two courses on compiler construction with Objective C and Java It turns out that
OOP can be applied productively in every phase of a compiler implementation and it
delivers the expected benefits as :
Objects enforce information hiding and state encapsulation,
Methods help to develop by divide and conquer technique.
All work is carried out by messages , which can be debugged by instrumenting their
methods.
Most importantly, classes encourage code reuse between projects.
Inheritance allows code reuse within a project and modifications from one project to
another.
Modern class libraries contain many pre-fabricated algorithms and data structures.
Page | 26