You are on page 1of 20

Assignment on Lexical Analyzer

1. Write a program in LEX to count the no of consonants and vowels for a given C and C++
source programs.
%{
int vow_count=0;
int const_count =0;
%}

%%
[aeiouAEIOU] {vow_count++;}
[a-zA-Z] {const_count++;}
%%

main()
{
printf("Enter the string of vowels and consonents:");
yylex();
printf("The number of vowels are: %d\n",vow);
printf("The number of consonants are: %d\n",cons);
return 0;
}

2. Write a program in LEX to count the no of: (i) positive and negative integers
(ii) positive and negative fractions. For C and C++ source programs

(i) :positive and negative integers


%{
#include<stdio.h>
int p_count=0;
int n_count=0;
%}
%%
[+]?([0-9])+ {
p_count++; printf("Positive Integer:%s\n",yytext);}
[-]([0-9])+ {
n_count++; printf("negative Integer:%s\n",yytext);}

%%
void main(int argc[],char *argv[])
{
yylex();
printf("the total number of Postive Integer are: %d\n" ,p_count);
printf("the total number of Negative Integer are:%d\n",n_count);
}

(ii) positive and negative fractions


%{
int postiveno=0;
int negtiveno=0;
int positivefractions=0;
int negativefractions=0;
%}

DIGIT [0-9]
%%

\+?{DIGIT}+ postiveno++;
-{DIGIT}+ negtiveno++;

\+?{DIGIT}*\.{DIGIT}+ positivefractions++;
-{DIGIT}*\.{DIGIT}+ negativefractions++;
. ;
%%

int main()
{
yylex();
printf("\nNo. of positive numbers: %d", postiveno);
printf("\nNo. of Negative numbers: %d", negtiveno);
printf("\nNo. of Positive numbers in fractions: %d", positivefractions);
printf("\nNo. of Negative numbers in fractions: %d\n",
negativefractions);
return 0;
}
3. Write a LEX program to recognize a valid C and C++ programs.

%{
#include <stdio.h>
#include <string.h>
int operators_count = 0, operands_count = 0, valid = 1, top = -1, l = 0, j
= 0;
char operands[10][10], operators[10][10], stack[100];
%}
%%
"(" {
top++;
stack[top] = '(';
}
"{" {
top++;
stack[top] = '{';
}
"[" {
top++;
stack[top] = '[';
}
")" {
if (stack[top] != '(') {
valid = 0;
}
else if(operands_count>0 && (operands_count-operators_count)!=1){
valid=0;
}
else{
top--;
operands_count=1;
operators_count=0;
}
}
"}" {
if (stack[top] != '{') {
valid = 0;
}
else if(operands_count>0 && (operands_count-operators_count)!=1){
valid=0;
}
else{
top--;
operands_count=1;
operators_count=0;
}
}
"]" {
if (stack[top] != '[') {
valid = 0;
}
else if(operands_count>0 && (operands_count-operators_count)!=1){
valid=0;
}
else{
top--;
operands_count=1;
operators_count=0;
}

}
"+"|"-"|"*"|"/" {
operators_count++;
strcpy(operators[l], yytext);
l++;
}
[0-9]+|[a-zA-Z][a-zA-Z0-9_]* {
operands_count++;
strcpy(operands[j], yytext);
j++;
}
%%

int yywrap()
{
return 1;
}
int main()
{
int k;
printf("Enter the arithmetic expression: ");
yylex();

if (valid == 1 && top == -1) {


printf("\nValid Expression\n");
}
else
printf("\nInvalid Expression\n");

return 0;
}

Design a scanner for MINI language


 In designing a scanner for this language:
1. Start with regular expressions
2. Identify Tokens...
3. Develop and simulate NFA
4. Construct and simulate DFA
5. Write a lex program, to recognize tokens in MINI language:
• Input: Tiny language
• Output: Tokens..,

Answer for Design scanner

A Sample TINY Program


Calculating the factorial of an input number x{Sample program in
TINY language –Factorial}
write"Enter an integer value: ";
readx;
factorial := 1;
count := x;
whilecount > 1 do
factorial := factorial * count;
count := count-1;
end;
write"factorial of " , x , " = " , factorial;
Assignment on Syntax analyzer
3. Write Calculator program
Expand the calculator program so that the new calculator program is
capable of processing:
user: 3 * (4 + 5)

user: x = 3 * (4 + 5) user: y = 5

user: x + 2*y 2^3/6 sin(1) + cos(PI)

tan

log

factorial

Answer for Calculator program


%{
#include <alloca.h>
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#include <ctype.h>
#define YYSTYPE double
double calcfact();
double reg[99];
double ans;
char format[20];
%}

%token NUMBER SPACE MOD RIGHTSHIFT LEFTSHIFT SEMICOLON SIN EOLN PIVAL
%token PLUS MINUS DIV MUL POW OPENBRACKET CLOSEBRACKET UNARYMINUS
%token COS TAN ASIN ACOS ATAN FACT INC DEC LAND OR COMPLEMENT
%token NOT XOR ASSIGN IOR AND OPENREG CLOSEREG REGA ANS FIX SCI ENG
%token CONST
%left PLUS MINUS
%left MUL DIV
%left UNARYMINUS
%left LAND OR XOR NOT AND IOR
%left LOG

list:
| list EOLN
| list expr EOLN
{ printf( format , (double) $2 ); ans=$2; }
;
expr: conditional_expr
;
conditional_expr: logical_or_expr
;
logical_or_expr: logical_and_expr
| logical_or_expr OR logical_and_expr
{ $$ = (int) $1 || (int) $3; }
;
logical_and_expr: inclusive_or_expr
| logical_and_expr LAND inclusive_or_expr
{ $$ = (int) $1 && (int) $3; }
;
inclusive_or_expr: exclusive_or_expr
| inclusive_or_expr IOR exclusive_or_expr
{ $$ = (int) $1 | (int) $3; }
;
exclusive_or_expr: and_expr
| exclusive_or_expr XOR and_expr
{ $$ = (int) $1 ^ (int) $3; }
;
and_expr: shift_expr
| and_expr AND shift_expr
{ $$ = (int) $1 & (int) $3; }
;
shift_expr: pow_expr
| shift_expr LEFTSHIFT pow_expr
{ $$ = (int) $1 << (int) $3; }
| shift_expr RIGHTSHIFT pow_expr
{ $$ = (int) $1 >>(int) $3; }
;
pow_expr: add_expr
| pow_expr POW add_expr { $$ = pow($1,$3); }
;
add_expr: mul_expr
| add_expr PLUS mul_expr { $$ = $1 + $3; }
| add_expr MINUS mul_expr { $$ = $1 - $3; }
;
mul_expr: unary_expr
| mul_expr MUL unary_expr { $$ = $1 ∗ $3; }
| mul_expr DIV unary_expr { $$ = $1 / $3; }
| mul_expr MOD unary_expr { $$ = fmod($1,$3); }
;
unary_expr: assign_expr
| MINUS primary %prec UNARYMINUS { $$ = -$2; }
| INC unary_expr { $$ = $2+1; }
| DEC unary_expr { $$ = $2-1; }
| NOT unary_expr { $$ = !$2; }
| LOG unary_expr { $$ = log($2); }
;
assign_expr: postfix_expr
| REGA OPENREG expr CLOSEREG ASSIGN postfix_expr
{ reg[(int)$3]=$6; $$=$6; }
| REGA OPENREG expr CLOSEREG
{ $$=reg[(int)$3]; }
| REGA
{ int i;
for(i=0;i<100;i++)
if (reg[i]!=0)
printf("%02d = %.2f\n",i,reg[i]);
$$=0;
} ;
postfix_expr: primary
| postfix_expr INC { $$ = $1+1; }
| postfix_expr DEC { $$ = $1-1; }
| postfix_expr FACT
{ $$ = calcfact((unsigned long int)$1); };
primary: NUMBER { $$ = $1; }
| PIVAL { $$ = M_PI; }
| OPENBRACKET expr CLOSEBRACKET { $$ = $2; }
| ANS { $$ = ans; }
| CONST OPENBRACKET expr CLOSEBRACKET { $$ = constval($3); }
| set_format ;
set_format: function_call
| FIX OPENBRACKET expr CLOSEBRACKET
{ sprintf(format,"
.%df\n",(int)$3); $$=0; }
| FIX { sprintf(format,"
f\n"); $$=0; }
| SCI OPENBRACKET expr CLOSEBRACKET
{ sprintf(format,"
.%dg\n",(int)$3); $$=0; }
| SCI { sprintf(format,"
g\n"); $$=0; }
| ENG OPENBRACKET expr CLOSEBRACKET
{ sprintf(format,"
.%de\n",(int)$3); $$=0; }
| ENG { sprintf(format,"
e\n"); $$=0; };
function_call: SIN OPENBRACKET expr CLOSEBRACKET
{ $$ = (cos($3)∗tan($3)); }
| COS OPENBRACKET expr CLOSEBRACKET
{ $$ = cos($3); }
| TAN OPENBRACKET expr CLOSEBRACKET
{ $$ = tan($3); }
| ASIN OPENBRACKET expr CLOSEBRACKET
{ $$ = asin($3); }
| ACOS OPENBRACKET expr CLOSEBRACKET
{ $$ = acos($3); }
| ATAN OPENBRACKET expr CLOSEBRACKET
{ $$ = atan($3); }
;

#include <stdio.h>
#include <ctype.h>
char ∗progname;
double yylval;

main( argc, argv )


char ∗argv[];
{
progname = argv[0];
strcpy(format,"%g\n");
yyparse();
}

yyerror( s )
char ∗s;
{
warning( s , ( char ∗ )0 );
yyparse();
}

warning( s , t )
char ∗s , ∗t;
{
fprintf( stderr ,"%s: %s\n" , progname , s );
if ( t )
fprintf( stderr , " %s\n" , t );
}

Lex file for an advanced calculator


%{
#define YYSTYPE double
#include "calcparse.tab.h"
#include <math.h>
extern double yylval;
%}
D 0-9.

[\t] { ; }
log return LOG;
pi return PIVAL;
sin return SIN;
cos return COS;
tan return TAN;
and return AND;
not return NOT;
xor return XOR;
or return OR;
reg return REGA;
ans return ANS;
fix return FIX;
sci return SCI;
eng return ENG;
const return CONST;
bintodec return BINTODEC;
dectobin return DECTOBIN;
{D}+ { sscanf( yytext, "%lf", &yylval ); return NUMBER ; }
[a-zA-Z_]+ return IDENT;
"[" return OPENREG;
"]" return CLOSEREG;
"<<" return LEFTSHIFT;
">>" return RIGHTSHIFT;
"++" return INC;
"--" return DEC;
"+" return PLUS;
"-" return MINUS;
"~" return UNARYMINUS;
"/" return DIV;
"∗" return MUL;
"^" return POW;
"!" return FACT;
"(" return OPENBRACKET;
")" return CLOSEBRACKET;
"%" return MOD;
"^^" return XOR;
"!!" return NOT;
"=" return ASSIGN;
"&&" return LAND;
"||" return OR;
"|" return IOR;
"&" return AND;
"~~" return COMPLEMENT;
"\n" return EOLN;

CFG for MINI Language and LR(1) parser

Write a CFG for the MINI language specifications.


The predictive parsing method, either based on a set of recursive functions or on its non-recursive
variant using a table, rest on the premise that whenever a given terminal is seen the algorithm must
choose the correct production. This means that for each terminal symbol there must be a unique
production that allows the algorithm to “see” or “guess” the production correctly. Furthermore,
whenever a terminal is seen the corresponding production is expanded as it corresponds to the parse
tree being expanded. On a left-recursive grammar there is a possibility that the algorithm will try to
expand a production without consuming any of its input. In this case, guessing right does not help as it
due to the recursion the tree is continuously being expanded without any advanced in the input
symbols. Thus the parsing process never terminates.
Consider the following CFG G = (N={S, A, B, C, D}, T={a,b,c,d}, P, S) where the set of
productions P is given below:
S→A
A → BC | DBC
B → Bb | ε
C→c|ε
D→a|d
Transform your CFG into:
Predictive parser (LL(1)). - Compute FIRST, FOLLOW sets for the grammar
and create the Parsing table (manually).
it is left-recursive. You can expand B using a production with B as the left-most
symbol without consuming any of the input terminal symbols. To eliminate this
left recursion we add another non-terminal symbol, B’ and productions as follows:
S →A
A → BC | DBC
B → bBʼ | ε
Bʼ → bBʼ | ε
C →c|ε
D →a|d
FIRST(S) = , a, b, c, d, ε - FOLLOW(S) = { $ }
FIRST(A) = , a, b, c, d, ε - FOLLOW(A) = { $ }
FIRST(B) = , b, ε - FOLLOW(B) = { c, $ }
FIRST(B’) = , b, ε - FOLLOW(B’) =, c, $ -
FIRST(C) = , c, ε - FOLLOW(C) = { $ }
FIRST(D) = { a, d } FOLLOW(D) = { b, c, $ }
Non-terminals A, B, B’, C and S are all nullable.
Parsing table
Bottom up parser (LR(1)). - Construct either SLR (LR(0)), canonical (LR(1)), or LARL
parsing table (manually).
Let we use the grammar below already augmented with the basic EOF production
(0) :-
(0) S → Stmts $
(1) Stmts → Stmt
(2) Stmts → Stmts ; Stmt
(3) Stmt → Var = E
(4) Var → id *E +
(5) Var → id
(6) E → id
(7) E → ( E )
Construct LR(0)
Construct the LR(0) parsing table
Based on the DFA above we derive the LR parsing table below where we noted a shift/reduce
conflict in state 3. In this state the presence of a ‘*‘ indicates that the parse can either reduce
using the production 5 or shift by advancing to state s6. Note that by reducing it would then be
left in a state possible s0 where the presence of the ‘*‘ would lead to an error. Clearly, this
grammar is not suitable for the LR(0) parsing method.
Construct the set of LR(1 )

As can be seen there number of states in this new DFA is much larger when
compared to the DFA that recognizes the LR(0) sets of items.
Construct the LR(1) parsing table

the LALR(1) parsing table this grammar


There are many states with very similar core items that differ only on the look-ahead and can
thus be merged as suggested by the procedure to construct LALR parsing tables. There are many
states with identical core items thus differing only in the look-ahead and can thus be merged as
suggested by the procedure to construct LALR parsing tables. For instance states {s10, s15, s16}
could be merged into a single state named s40. The same is true for states in the sets s41={s17,
s18, s19}, s42={s21, s22, s23} and s43={s11, 13, s14} thus substantially reducing the number of
states as it will be seen in the next point in this exercise.
The table below reflects these merge operations resulting in a much smaller table which is LALR
as there are no conflicts due to the merging of states with the same core items.
the LALR(1) parsing table
Write a parsing program in yacc to parse tokens from the MINI language.te a LEX
program to recognize a valid C and C++ programs.
void s() {

if (token == ’(’) ,

advance();

s1();

- else if (token == ’a’)

advance();

else error();

void a() {

s();

a1();

You might also like