You are on page 1of 2

Experiment No.

01

Aim: Implementation of lexical analyzer.


Code:
import re
keywords = ['auto', 'break', 'case', 'char', 'const', 'continue', 'default',
'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static',
'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile',
'while']
operators = ['+', '-', '*', '/', '=', '<', '>', '==', '!=', '<=', '>=', '&&',
'||', '++', '--', '+=', '-=', '*=', '/=']
punctuation = ['(', ')', '{', '}', '[', ']', ';', ',', '.']
whitespaces = [' ', '\t', '\n']
literals = re.compile(r'\b\d+(\.\d+)?\b')
string_literals = re.compile(r'"([^"\\]*(?:\\.[^"\\]*)*)"')
identifier = re.compile(r'^[a-zA-Z_]\w*$')
def find_tokens(code):
tokens = []
lines = code.split('\n')
for line_no, line in enumerate(lines, start=1):
token = ''
for char in line:
if char in whitespaces:
if token:
tokens.append((token, get_token_type(token), line_no))
token = ''
elif char in punctuation or char in operators:
if token:
tokens.append((token, get_token_type(token), line_no))
token = ''
tokens.append((char, get_token_type(char), line_no))
else:
token += char
if token:
tokens.append((token, get_token_type(token), line_no))
return tokens
def get_token_type(token):
if token in keywords:
return 'Keyword'
elif token in operators:
return 'Operator'
elif token in punctuation:
return 'Punctuation'
elif literals.match(token):
return 'Literal'
elif string_literals.match(token):
return 'String Literal'
elif identifier.match(token):
return 'Identifier'
else:
return 'Unknown'
with open('demo_file.c', 'r') as file:
code = file.read()
tokens = find_tokens(code)
for token, token_type, line_no in tokens:
print(f'Token: {token} \nToken Type: {token_type} \nLine Number:
{line_no}\n')
demo_file.c:
void main () {
int a = 10;
int b = 20;
int c = a + b;
printf("%d", c);
}

Output:

You might also like