You are on page 1of 43

CSE-2012

LAB LPS-9
Name :- Mugdha
Registration Number :- 20BPS1095
1. Implement using C++ the Rabin-Karp algorithm.
CODE –

#include<iostre>
#include<string.h>
using namespace std;
// d is the number of characters in the
input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)
{ int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for
pattern int t = 0; // hash value
for txt int h = 1;

// The value of h would be "pow(d, M-


1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;
// Calculate the hash value of pattern and first
// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t + txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{
// Check the hash values of current window of text
// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] =
"GEEK"; char
txt[100], pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call
search(pat, txt,
q);
cout<<count;
return 0;
}

Output –

2. For various values of n (length of S1) and m (length of S2)


compute the running times of the brute-force program in IPS1 and
the Rabin-Karp program in LPS1. Record the data in a table as
below. Here T1(P) and T2(P) are the running times of the brute
force program and the Rabin-Karp program respectively.

CODE:

#include <iostream>
#include <string.h>
#include <cstdlib>
#include
<sys/time.h> using
namespace std;
#define d 256
int count = 0;

void bfp(string s1, string s2){ int n =


s1.length(), m = s2.length();
int i = 0, j = 0, flag = 1, c =
0; while(i < n){ j = 0;
while(j < m){
flag = 1;
if (s1[i] != s2[j]){
flag = 0;
i++;
break; }
else{ i++;
j++;
}
}
if (flag == 1) c++;
}
}

void rabin_karp_algo(string pat, string txt, int


q){ int M = pat.length();
int N =
txt.length(); int i, j; int p = 0;
// hash value for pattern
int t = 0; // hash value for
txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}
// Slide the pattern over text one by
one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}
i
// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-
1]
if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

int main(){ string s1 = "a", s2


= "aaaa";

int n = 4, m = 0, an[10], am[10];


float t1[10], t2[10];

for(int i = 0; i < 10
;i++){ n = n + 2*i;
m=m+
i; an[i] = n; am[i] = m; s1 =
"aaaaaaaaaaaaaaaaaaaaaaaaaaa", s2 =
"aaaaaaaa"; struct timeval start, end;
// start timer.
gettimeofday(&start,
NULL);
// unsync the I/O of C++.
ios_base::sync_with_stdio(false);
// function
call
bfp(s1,s2);
// stop
timer.
gettimeofday(&end,
NULL);
// Calculating total time taken by the
program. double time_taken;
time_taken = (end.tv_sec - start.tv_sec) * 1e6;
time_taken = (time_taken + (end.tv_usec -
start.tv_usec))
* 1e-6; t1[i] = time_taken;
struct timeval start1, end1;
// start timer.
gettimeofday(&start1,
NULL);
// unsync the I/O of C++.
ios_base::sync_with_stdio(false);
// A prime
number int q =
101;
// function call
rabin_karp_algo(s2,s1,q)
; // stop timer.
gettimeofday(&end1,
NULL);
// Calculating total time taken by the program.
double time_taken1;
time_taken1 = (end1.tv_sec - start1.tv_sec) * 1e6;
time_taken1 = (time_taken1 + (end1.tv_usec -
start1.tv_usec))
* 1e-6; t2[i] = time_taken1;
}

cout<<"S.No."<<"\t"<<"n"<<"\t"<<"m"<<"\t"<<"T1(p)"<<"\t\t
"<<"T2(p) in seconds"<<endl;
for(int i = 0 ; i < 10 ;i++){
printf("%d\t%d\t%d\t%f\t%f\n",(i+1),an[i], am[i], t1[i],
t2[i]);
}}

Output –
3. Run your program for IPS1 with the strings given in the link
https://www-igm.univmlv.fr/~lecroq/string/examples/exp5.html

CODE:

#include
<iostream> using
namespace std;
int main(){
string s1,s2; cin>>s1; cin>>s2;
int n = s1.length(), m =
s2.length(); int i = 0, j = 0, flag =
1, c = 0; while(i < n){ j = 0;
while(j < m){
flag = 1;
if (s1[i] !=
s2[j]){ flag = 0;
i++; break; }
else{ i++; j++;
}
}
if (flag == 1) c++;
}
cout<<c
;

Output –

4. Run your program for LPS1 with the strings given in the link
https://www-igm.univ-mlv.fr/~lecroq/string/examples/exp5.html

CODE:

#include<iostrea
m>
#include<string.h
>

using namespace std;

// d is the number of characters in the


input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
-> A prime number
*/
void search(char pat[], char txt[], int q)
{
int M =
strlen(pat); int N
= strlen(txt); int
i, j;
int p = 0; // hash value for
pattern int t = 0; // hash
value for txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++) h
= (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text: Remove


// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t = (t
+ q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call
search(pat, txt,
q); cout<<count;
return 0;
}
Output –

5. Modify your program for IPS1 to count only the total number of
character comparisons and print the same.

CODE:

#include <iostream>
using namespace std;

int main(){ string s1,s2; cin>>s1; cin>>s2; int


n = s1.length(), m = s2.length(); int i = 0, j
= 0, flag = 1, c = 0, charCmp = 0; while(i <
n){ j = 0; while(j < m){ flag = 1;
if (s1[i] != s2[j]){
charCmp++;
flag =
0; i++;
break; }
else{ i++;
j++;
}
}
if (flag == 1) c++;
}
cout<<"Total Occurences: "<<c<<endl;
cout<<"Character Comparison: "<<charCmp;

OUTPUT:

6. Modify your program for LPS1 to count only the total number of
character comparisons and print the same.
CODE:

#include<iostrea
m>
#include<string.h
>

using namespace std;


// d is the number of characters in the
input alphabet #define d 256 int count =
0, charCmp = 0;
/* pat ->
pattern txt
-> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)
{
int M =
strlen(pat); int N
= strlen(txt); int
i, j;
int p = 0; // hash value for
pattern int t = 0; // hash
value for txt int h = 1;

// The value of h would be


"pow(d, M-1)%q" for (i = 0; i < M-
1; i++) h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}
// Slide the pattern over text one by
one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j]){
bre
ak; }
else{
charCmp++;
}
}
// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-
1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100]; scanf("%s",
txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call search(pat, txt,


q); cout<<"Total Occurences:
"<<count<<endl; cout<<"Character
Comparison: "<<charCmp; return 0;
}
OUTPUT:

7. For various values of n (length of S1) and m (length of S2) count


the total number of character comparisons of the bruteforce
program in IPS1 and the Rabin-Karp program in LPS1 (Use your
programs for LPS5 and LPS6). Record the data in a table as below.
Here C1(P) and C2(P) are the total number of character
comparisons of the brute force program and the Rabin-Karp
program respectively.

CODE:

#include <iostream>
#include <string.h>
#include <cstdlib>
#include
<sys/time.h> using
namespace std;

#define d 256
int c1[10], c2[10];

int bfp(string s1, string s2){


int count = 0, charCmp =
0; int n = s1.length(), m =
s2.length(); int i = 0, j = 0, flag =
1, c = 0;
while(i < n){ j =
0; while(j <
m){ flag =
1;
if (s1[i] != s2[j]){
flag = 0;
charCmp+
+; i++; break;
} else{ i++;
j++;
}
} if (flag == 1)
c++;
}
return charCmp;
}

int rabin_karp_algo(string pat, string txt, int


q){ int count = 0, charCmp = 0;
int M =
pat.length(); int N
= txt.length(); int i,
j;
int p = 0; // hash value for
pattern int t = 0; // hash
value for txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{
// Check the hash values of current window of text
// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M;
j++)
{ if (txt[i+j] != pat[j])
break;
else{
charCmp++;
}
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}

}
return charCmp;
}

int main(){ string s1 = "a", s2


= "aaaa";

int n = 4, m = 0, an[10], am[10];

for(int i = 0; i < 10
;i++){ n = n + 2*i;
m=m+
i; an[i] =
n; am[i]
= m;
s1 = "GCATCGCAGAGAGTATACAGTACG", s2 =
"GCAGAGAG";
s1 = s1 + s2;
c1[i] =
bfp(s1,s2);
c2[i] =
rabin_karp
_algo(s2,s1
,101);

cout<<"S.No."<<"\t"<<"n"<<"\t"<<"m"<<"\t"<<"C1(p)
Comp."<<"\t"<<"C2(p)
Comp."<<endl;
for(int i = 0 ; i < 10 ;i++){
printf("%d\t%d\t%d\t%d\t\t%d\n",(i+1),an[i], am[i], c1[i],
c2[i]);
}
}
Output -

8. Using your program for LPS1 investigate for which values of the
input, the worst-case running time is achieved.
CODE:

#include<iostrea
m>
#include<string.h
>

using namespace std;

// d is the number of characters in the


input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)
{ int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for
pattern int t = 0; // hash value
for txt int h = 1;

// The value of h would be "pow(d, M-


1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t + txt[i])%q;
}
// Slide the pattern over text one by
one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one
by one if ( p == t ) {
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] =
"GEEK"; char
txt[100], pat[100];
scanf("%s", txt);
scanf("%s", pat);
// A prime
number int q =
101;

// function call
search(pat, txt,
q); cout<<count;
return 0;
}

OUTPUT:

9. How many spurious hits does the Rabin-Karp string matching


algorithm encounter in the text T = “3141512653849792” when
looking for all occurrences of the pattern P = “26”, working modulo
q = 11 and over the alphabet A= {0, 1, 2, . . . , 9}?
CODE:
#include<iostrea
m>
#include<string.h
>

using namespace std;


// d is the number of characters in the
input alphabet #define d 256 int count
= 0, sp = 0;
/* pat -> pattern txt -
> text q -> A prime
number
*/
void search(char pat[], char txt[], int q)
{ int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for
pattern int t = 0; // hash value
for txt int h = 1;
// The value of h would be "pow(d,
M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

}
else if (p != t) sp++;
// Calculate hash value for next window of text:
Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
11;

// function call
search(pat, txt, q);
cout<<"Ocuurence:
"<<count<<endl;
cout<<"Spurious hits
"<<sp; return 0;
}

Output –
10. Run LPS1 for the text abdcabcde and the pattern ab.
CODE:
#include<iostrea
m>
#include<string.h
>

using namespace std;

// d is the number of characters in the


input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)

{
int M = strlen(pat);
int N =
strlen(txt); int i, j; int p = 0;
// hash value for pattern
int t = 0; // hash value for
txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text: Remove


// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to positive if
(t < 0) t
= (t +
q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call
search(pat, txt,
q); cout<<count;
return 0;
}
Output –

You might also like