You are on page 1of 11

Implementation of Lexical Analyzer (any one above programs) using Lex Tool

Compiler Design lab

eXperimenT - 2

Course code: BCSE307P

Name: Abhinav V

Reg.No: 21BAI1213
implemenTaTion of leXiCal analyzer using leX Tool

Program:
%{
#include <stdio.h>
#include <stdlib.h>
%}
%option noyywrap
%{
/* Define tokens */
#define IDENTIFIER 257
#define KEYWORD 258
#define INTEGER 259
#define PLUS 260
#define MINUS 261
#define MULTIPLY 262
#define DIVIDE 263
%}
/* Regular expressions and actions */
%%
[ \t\n] /* Ignore whitespace */
[0-9]+ { printf("INTEGER: %s\n", yytext); }
[a-zA-Z][a-zA-Z0-9]* { printf("IDENTIFIER: %s\n", yytext); }
"if" { printf("KEYWORD: if\n"); }
"else" { printf("KEYWORD: else\n"); }
"while" { printf("KEYWORD: while\n"); }
"int" { printf("KEYWORD: int\n"); }
"+" { printf("PLUS\n"); }
"-" { printf("MINUS\n"); }
"*" { printf("MULTIPLY\n"); }
"/" { printf("DIVIDE\n"); }
[[:punct:]] { /* Ignore punctuation */ }
. { printf("Invalid token: %s\n", yytext); }
%%
int main() {
while(yylex());
return 0;
}
Output :
ConsTruCTion of synTaX analyzer
Program:
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

// Token types
typedef enum {
INTEGER,
PLUS,
MINUS,
END
} TokenType;

// Structure for tokens


typedef struct {
TokenType type;
int value; // Only used for INTEGER type
} Token;

// Function prototypes
Token getNextToken();
void error(const char *msg);
void eat(TokenType expected);
int expr();
int term();
int factor();

// Global variable to hold the current token


Token currentToken;

int main() {
// Initialize the current token
currentToken = getNextToken();

// Parse and evaluate the expression


int result = expr();

// Ensure that the entire input is consumed


eat(END);

// Print the result


printf("Result: %d\n", result);

return 0;
}
// Get the next token from input
Token getNextToken() {
int c;
while ((c = getchar()) != EOF) {
if (isdigit(c)) {
ungetc(c, stdin);
int value;
scanf("%d", &value);
return (Token){INTEGER, value};
} else if (isspace(c)) {
continue; // Ignore whitespace
} else if (c == '+') {
return (Token){PLUS, 0};
} else if (c == '-') {
return (Token){MINUS, 0};
} else {
error("Invalid character");
}
}
return (Token){END, 0}; // Return END token at the end of input
}

// Report an error and exit


void error(const char *msg) {
fprintf(stderr, "Error: %s\n", msg);
exit(1);
}

// Consume the expected token


void eat(TokenType expected) {
if (currentToken.type == expected) {
currentToken = getNextToken();
} else {
error("Unexpected token");
}
}

// Parse and evaluate an expression


int expr() {
int result = term();

while (currentToken.type == PLUS || currentToken.type == MINUS) {


Token op = currentToken;
if (op.type == PLUS) {
eat(PLUS);
result += term();
} else {
eat(MINUS);
result -= term();
}
}

return result;
}

// Parse and evaluate a term


int term() {
int result = factor();

return result;
}

// Parse and evaluate a factor


int factor() {
if (currentToken.type == INTEGER) {
int value = currentToken.value;
eat(INTEGER);
return value;
} else if (currentToken.type == PLUS) {
eat(PLUS);
return factor();
} else if (currentToken.type == MINUS) {
eat(MINUS);
return -factor();
} else {
error("Invalid factor");
}
}

Output:
implemenT a C program for ll(1) Top Down parser for
any given ll(1) grammar
Program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef struct {
char production[3][10];
} CfgRule;

typedef struct {
CfgRule *rules;
int count;
} Cfg;

Cfg new_cfg() {
Cfg cfg = {NULL, 0};
return cfg;
}

void add_rule(Cfg *cfg, CfgRule rule) {


cfg->rules = realloc(cfg->rules, sizeof(CfgRule) * (cfg->count + 1));
cfg->rules[cfg->count++] = rule;
}

typedef enum {
T_EOF,
T_ERROR,
T_INT,
T_LPAREN,
T_RPAREN,
T_PLUS,
T_TIMES,
} TokenType;

typedef struct {
TokenType type;
char *value;
} Token;

Token new_token(TokenType type, char *value) {


Token token = {type, value};
return token;
}
typedef struct {
Token *tokens;
int count;
} TokenStream;

TokenStream new_token_stream(char *input) {


TokenStream stream = {NULL, 0};
return stream;
}

void add_token(TokenStream *stream, Token token) {


stream->tokens = realloc(stream->tokens, sizeof(Token) * (stream->count +
1));
stream->tokens[stream->count++] = token;
}

TokenStream tokenize(char *input) {


TokenStream stream = new_token_stream(input);
int i = 0;
while (input[i] != '\0') {
switch (input[i]) {
case '+':
add_token(&stream, new_token(T_PLUS, "+"));
break;
case '*':
add_token(&stream, new_token(T_TIMES, "*"));
break;
case '(':
add_token(&stream, new_token(T_LPAREN, "("));
break;
case ')':
add_token(&stream, new_token(T_RPAREN, ")"));
break;
case ' ':
case '\t':
case '\n':
i++;
continue;
default:
if (isdigit(input[i])) {
int j = i;
while (isdigit(input[j])) {
j++;
}
add_token(&stream, new_token(T_INT, strndup(input + i, j -
i)));
i = j;
} else {
add_token(&stream, new_token(T_ERROR, ""));
break;
}
}
i++;
}
add_token(&stream, new_token(T_EOF, ""));
return stream;
}

typedef struct {
Cfg *grammar;
TokenStream *input;
int input_index;
Token lookahead;
} Parser;

Parser new_parser(Cfg *grammar, TokenStream *input) {


Parser parser = {grammar, input, 0, input->tokens[0]};
return parser;
}

Token next_token(TokenStream *input) {


return input->tokens[input->count++];
}

int match(Parser *parser, TokenType expected) {


if (parser->lookahead.type == expected) {
parser->lookahead = next_token(parser->input);
return 1;
}
return 0;
}

int parse_expr(Parser *parser);

int parse_factor(Parser *parser) {


if (match(parser, T_INT))
return 1;
else if (match(parser, T_LPAREN)) {
if (parse_expr(parser) && match(parser, T_RPAREN))
return 1;
}
return 0;
}

int parse_term_prime(Parser *parser) {


if (match(parser, T_TIMES)) {
if (parse_factor(parser) && parse_term_prime(parser))
return 1;
else
return 0;
}
return 1;
}

int parse_term(Parser *parser) {


if (parse_factor(parser) && parse_term_prime(parser))
return 1;
return 0;
}

int parse_expr_prime(Parser *parser) {


if (match(parser, T_PLUS)) {
if (parse_term(parser) && parse_expr_prime(parser))
return 1;
else
return 0;
}
return 1;
}

int parse_expr(Parser *parser) {


if (parse_term(parser) && parse_expr_prime(parser))
return 1;
return 0;
}

int parse(Parser *parser) {


if (parse_expr(parser) && match(parser, T_EOF))
return 1;
return 0;
}

int main() {
Cfg grammar = new_cfg();
CfgRule rule1 = {"E", "+T", ""};
CfgRule rule2 = {"E", "T", ""};
CfgRule rule3 = {"T", "*F", ""};
CfgRule rule4 = {"T", "F", ""};
CfgRule rule5 = {"F", "(E)", ""};
CfgRule rule6 = {"F", "i", ""}; // i stands for an integer
add_rule(&grammar, rule1);
add_rule(&grammar, rule2);
add_rule(&grammar, rule3);
add_rule(&grammar, rule4);
add_rule(&grammar, rule5);
add_rule(&grammar, rule6);

printf("Enter an expression: ");


char input[100];
scanf("%99[^\n]", input); // Read up to 99 characters until newline

TokenStream input_tokens = tokenize(input);


Parser parser = new_parser(&grammar, &input_tokens);
if (parse(&parser))
printf("Parsing successful!\n");
else
printf("Parsing failed!\n");

return 0;
}

Output:

This input has an unbalanced parentheses and a trailing '*'


operator without an operand, which violates the grammar rules.
So, the LL(1) parser rejects the input.

You might also like