Professional Documents
Culture Documents
String Matching
String Matching
H Ni , thng 2 2015
Thut ton
a. c im
Thc hin t tri sang phi.
C pha tin x l vi phc tp O(m).
phc tp thut ton l O(n + m);
b.Thut ton
Thut ton preMp,:
Input :
xu mu x, di m, mng mpNext
Output:
mng mpNext cha di trng nhau gia tin t v hu t ca xu
Formats: preMp(char *x, int m, int mpNext[])
Actions:
void preMp(char *x, int m, int mpNext[]) {
int i, j;
i = 0;
//mang mpNext the hien do dai trung nhau lon
j = mpNext[0] = -1;
//nhat giua tien to va hau to
while (i < m) {
while (j > -1 && x[i] != x[j])
{
j = mpNext[j];
//chay nguoc xet xem do dai lon nhat cua
//vi tri giong voi x[i]
}
int k= mpNext[++i] = ++j;
}
}
EndActions.
Kim nghim preMp :
Xu vo ATCACATCATCA
Kt qu
c
mpNext[i]
ghi ch
-1
-1
-1
=>mpNext[1] =0
-1
-1
A
0
-1
A
10
11
Thut ton MP :
Input :
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.Output:
v tr trng nhau
Formats: MP(char *x, int m, char *y, int n)
Actions:
void MP(char *x, int m, char *y, int n) {
int i, j, mpNext[m];
/* Preprocessing */
preMp(x, m, mpNext);
for(int k =0;k<m;k++){
cout<<x[k]<<" "<<mpNext[k]<<endl;
}
/* Searching */
i = j = 0;
while (j < n) {
=>mpNext[++9] = 1++
x = ATCACATCATCA
Kim nghim MP :
j
y[j]
x[i]
ghi ch
trng
trng
trng
trng
trng
trng
trng
trng
trng
10
trng
11
trng
12
trng
13
trng
14
10
trng
15
11
trng
1.2
a. c im
Thc hin t tri sang phi.
C pha tin x l vi phc tp O(m).
phc tp thut ton l O(n + m);
b.Thut ton
Thut ton PreKmp: //thc hin bc tin x l xc nh s k t c tin t v hu t trng nhau
Input :
Xu mu X =(x0, x1,..,xm), di m.
Output: Mng gi tr kmpNext[].
Formats:
PreKmp(X, m, kmpNext);
Actions:
i = 1; kmpNext[0] = 0; len = 0; //kmpNex[0] lun l 0
while (i < m) {
if (X[i] == X[len] ) { //Nu X[i] = X[len]
len++;
kmpNext[i] = len;
i++;
}
else { // Nu X[i] != X[len]
if ( len != 0 ) { len = kmpNext[len-1]; }
else { kmpNext[i] = 0; i++; }
}
}
EndActions.
Kim nghim PreKmp (X, m, kmpNext):
X[] = ABABCABAB, m = 9.
i=?
(X[i]== X[len])?
Len =?
kmpNext[i]=?
Len =0
kmpNext[0]=0
i=1
(B==A): No
Len =0
kmpNext[1]=0
i=2
(A==A): Yes
Len =1
kmpNext[2]=1
i=3
(B==B): Yes
Len=2
kmpNext[3]=2
i=4
(C==A): No
Len=0
kmpNext[4]=0
i=5
(A==A): Yes
Len=1
kmpNext[5]=1
i=6
(B==B): Yes
Len=2
kmpNext[6]=2
i=7
(A==A): Yes
Len=3
kmpNext[6]=3
i=8
(B==B): Yes
Len=4
kmpNext[6]=4
X[] = ABABCABAB, m = 9.
Y[] = ABABDABACDABABCABAB, n = 19
Bc 1 (Tin x l). Thc hin Prekmp(X, m, kmpNext) ta nhn c:
kmpNext[] = { 0, 0, 1, 2, 0, 1, 2, 3, 4}
Bc 2 (Lp):
(X[j]==Y[i])?
(J ==9)?
I =? J = ?
(X[0]==Y[0]): Yes
No
i=1, j=1
(X[1]==Y[1]): Yes
No
i=2, j=2
(X[2]==Y[2]): Yes
No
i=3, j=3
(X[3]==Y[3]): Yes
No
i=4, j=2
(X[2]==Y[4]): No
No
i=4, j=0
(X[0]==Y[4]): No
No
i=5, j=0
(X[0]==Y[5]): Yes
No
i=6, j=1
(X[1]==Y[6]): Yes
No
i=7, j=2
(X[2]==Y[7]): Yes
No
i=7, j=2
(X[1]==Y[8]): No
No
i=7, j=2
(X[0]==Y[8]): No
No
i=7, j=2
(X[0]==Y[9]): No
No
i=7, j=2
(X[0]==Y[10]): Yes
No
i=7, j=2
(X[1]==Y[11]): Yes
No
i=7, j=2
(X[2]==Y[12]): Yes
No
i=7, j=2
(X[3]==Y[13]): Yes
No
i=7, j=2
1.3
(X[4]==Y[14]): Yes
No
i=7, j=2
(X[5]==Y[15]): Yes
No
i=7, j=2
(X[6]==Y[16]): Yes
No
i=7, j=2
(X[7]==Y[17]): Yes
No
i=7, j=2
(X[8]==Y[18]): Yes
Yes
i=19, j=4
a.c im
Thc hin t tri qua phi
S dng mt hm hash
phc tp ca pha tin s l l O(m)
phc tp ca pha tm kim l O(m x n)
Thi gian chy mong mun l O(m + n)
b.Thut ton
Thut ton Karp- Rabin :
Input :
Xu mu pat di M
Vn bn ngun txt di N
Output:
Cc v tr trng nhau
Formats: search()
Actions:
private static long hash(String key, int M) hm tnh gi tr hash ban u
{
long h = 0;
long d1=d;
for (int j = 0; j < M; j++)
{
h = ( key.charAt(j)*d1)+h ;
d1=d1/2;
}
return h%Q ;
}
private static long REHASH(char a, char b,long h){ //hm tnh li gi tr hash nhanh
h = ((h-a*d)*2+b )%Q;
return h;
}
17819
17533
17979
10
19389
11
17339
12
17597
13
17102
14
17117
15
17678
16
17245
10
17
17917
1.4
11
18
17723
12
19
18877
a. c im
y l thut ton x l bit
C hiu qu khi di mu nh hn gii hn vt l
Pha tin x l phc tp khng gian v thi gian l O(m + )
Pha tm kim phc tp l O(n)
b.Thut ton
00000010
10101001
/* Preprocessing */
lim = preSo(x, m, S);
/* Searching */
for (state = ~0, j = 0; j < n; ++j) {
state = (state<<1) | S[y[j]];
System.out.println (Integer.toBinaryString(state));
if (state < lim) //do dai thoa man xau mau so 0 dich den vi tri cua lim
{
System.out.println(Integer.toBinaryString(lim));
System.out.println("vi tri trung nhau "+ (j-m+1));
}
}
}
EndActions.
Kim nghim SO:
j
c = y[j]
state
11111111111111111111111111111110
11111111111111111111111111111101
11111111111111111111111111111011
11111111111111111111111111111111
11111111111111111111111111111111
11111111111111111111111111111110
11111111111111111111111111111101
11111111111111111111111111111011
11111111111111111111111111110110
10
11111111111111111111111111101111
11
11111111111111111111111111011110
12
11111111111111111111111110111111
13
11111111111111111111111101111110
14
11111111111111111111111110000000
c im :
Thc hin t tri qua phi
C pha tin x l vi phc tp O(m)
phc tp thut ton l O(n)
Xu mu X =(x0, x1,..,xm), di m.
Output: Mng gi tr kmpNext[].
Formats:
preKmp(X, m, kmpNext[]);
Actions:
i = 0;
j = kmpNext[0] = -1;
while (i < m) {
while (j > -1 && x[i] != x[j])
j = kmpNext[j];
i++;
j++;
if (x[i] == x[j])
kmpNext[i] = kmpNext[j];
else
kmpNext[i] = j;
}
EndActions.
Kim nghim PreKmp(X, m, kmpNext[]) vi X[]=GCAGAGAG, m=8.
i=?
(i==8)?
( X[i] == X[j] ) ?
kmpNext[i] = ?
j = -1
kmpNext[0] = 0
(C==G) : False
j=0
kmpNext[1] = 0
(A==G) : False
j=0
kmpNext[2] = 0
(G==G) : True
j=1
kmpNext[3] = 1
(A==C) : False
j=0
kmpNext[4] = cha xc nh
(A==G) : False
j=0
kmpNext[4] = 0
(G==G) : True
j=1
kmpNext[5] = 1
(A==C) : False
j=0
kmpNext[6] = cha xc nh
(A==G) : False
j=0
kmpNext[6] = 0
(G==G) : True
j=1
kmpNext[7] = 1
Kt lun : kmpNext = {0, 0, 0, 1, 0, 1, 0, 1}
1
2
3
4
4
5
6
6
7
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
j=?
i = ell;
}
else {
k = ell;
i = kmpNext[i];
}
}
EndActions.
(i>=m)?
(k>=ell)?
X[3]==Y[3] : false
X[1]==Y[3] : false
X[2]==Y[5] : false
X[7]==Y[12] : true
X[1]==Y[14] : false
X[1]==Y[15] : false
X[1]==Y[16] : false
X[4]==Y[20] : false
False
False
False
true
False
False
False
False
False
False
False
true
False
False
False
False
c im :
Thc hin t tri qua phi
phc tp thut ton l O(n*m)
Thc hin so snh theo th t v tr 1, 2, 3, m-2, m-1, 0.
j
0
2
3
5
13
14
15
16
20
i
1
1
1
1
1
1
1
1
1
k
0
1
0
0
0
0
0
0
0
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
X[0] == Y[j]
(C==C) : true
(C==T) : false
(C==C) : true
(C==C) : true
(C==G) : false
(C==A) : false
(C==G) : false
(C==A) false
(C==G) : false
(C==T) : false
(C==A) : false
(C==T) : false
(C==A) : false
(C==C) : true
False
True
False
True
False
True
False
False
j
0
2
3
5
7
8
9
10
11
12
13
14
15
16
18
a. c im
Thc hin t phi qua tri
Pha tin x l phc tp thi gian v khng gian l O( m + )
Pha tm kim phc tp thi gian l O(mn)
b.Thut ton
}
EndActions.
X[]=GCAGAGAG, m = 8
i
X[i]
bmBc[x[i]]
bmBc[G]=7
bmBc[C]=6
bmBc[A]=5
bmBc[G]=4
bmBc[A]=3
bmBc[G]=2
bmBc[A]=1
bmBc[c]
//xac dinh hau to chung dai nhat giua xau x[0 ... x] va x[0 ... m-
int f, g, i;
suff[m - 1] = m;
//i vi tri xet
g = m - 1;
for (i = m - 2; i >= 0; --i) {
cout<<"bat dau for"<<i<<" "<<g<<" "<<f<<endl;
if (i > g && suff[i + m - 1 - f] < i - g)
{
//sau da trung nhau r
suff[i] = suff[i + m - 1 - f];
cout<<"vao nhanh 1 "<<suff[i]<<" "<<i + m - 1 - f<<" "<<f<<endl;
}
else {
if (i < g)
g = i;
f = i;
while (g >= 0 && x[g] == x[g + m - 1 - f]) //neu trung nhau thi dua do dai trung vao
--g;
suff[i] = f - g;
cout<<"vao nhanh 2 "<<suff[i]<<endl;
}
}
}
EndActions.
Kim nghim suffixes:
X[]=GCAGAGAG, m = 8
(i>g)?
Suff(i)
False
False
True
True
True
False
True
True
False
False
Ktlun : suff[]={1, 0, 0, 2, 0, 4, 0, 8}
X[]=GCAGAGAG, m = 8
( suff(i) == (i+1) )?
False
False
False
False
False
False
False
True
( bmGs[j] == m )?
bmGs[j]
True
True
True
True
True
True
true
Suff(i)
bmGs[m-1-suff(i)]
bmGs[6] = 7
bmGs[7] = 6
bmGs[7] = 5
bmGs[5] = 4
bmGs[7] = 3
bmGs[3] = 2
bmGs[7] = 1
Thut ton BM :
Input :
Xu mu x di m
Vn bn y di n
Output:
V tr trng nhau
Formats: BM(char *x, int m, char *y, int n)
Actions:
EndActions.
void BM(char *x, int m, char *y, int n) {
int i, j, bmGs[m], bmBc[ASIZE];
/* Preprocessing */
preBmGs(x, m, bmGs);
preBmBc(x, m, bmBc);
/* Searching */
j = 0;
while (j <= n - m) {
for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i); //chay het doan trung nhau
if (i < 0) {
//ca xau mau da trung khop
cout<<j;
j += bmGs[0]; //dich den vi tri xuat hien .................
}
else
j += max(bmGs[i], bmBc[y[i + j]] - m + 1 + i); //so sanh giua v
}
}
suff[]={1, 0, 0, 2, 0, 4, 0, 8}
bmGs[] = {7, 7, 7, 2, 7, 4, 7, 1}
bmBc[]
c
bmBc[c]
i<0
J+=max(BmGs[i
], bmBc[y[i + j]]
- m + 1 + i)<n-m
BmGs[i]
BmBc[y[i+j]]
bmBc[y[i + j]] - m +
1+i
x[i] == y[i + j]
7(no)
0(yes)
x[7]!=y[7] (G!
=A)
7(no)
1(yes)
x[7]==y[8] (G!
=G)
6(no)
1(yes)
x[6]==y[7] (A!
=A)
5(no)
1(yes)
7(no)
5(yes)
x[7]==y[12]
(G==G)
6(no)
5(yes)
x[6]==y[11]
(A==A)
5(no)
5(yes)
x[5]==y[10]
(G==G)
4(no)
5(yes)
x[4]==y[9]
(A==A)
3(no)
5(yes)
x[3]==y[8]
(G==G)
2(no)
5(yes)
x[2]==y[7]
(A==A)
1(no)
5(yes)
x[1]==y[6]
(C==C)
0(no)
5(yes)
x[0]==y[5]
(G==G)
-1(yes)
5(yes)
x[2]==y[12]
(A==A)
12(no)
x[5]!=y[6] (G!
=C)
Xu mu X =(x0, x1,..,xm), di m.
Output: mng gi tr bmBc[].
Formats:
preBmBc(x, m, bmBc);
Actions:
for (i = 0; i < ASIZE; ++i){
bmBc[i] = m;
}
cout<<endl;
for (i = 0; i < m - 1; ++i){
bmBc[x[i]] = m - i - 1;
}
EndActions.
Kim nghim preBmBc(X, m, bmBc) :
X[]=GCAGAGAG, m = 8
i
0
1
2
3
4
5
6
X[i]
G
C
A
G
A
G
A
bmBc[x[i]]
bmBc[G]=7
bmBc[C]=6
bmBc[A]=5
bmBc[G]=4
bmBc[A]=3
bmBc[G]=2
bmBc[A]=1
x
bmBc[c]
Thut ton suffixes :
A
1
Input :
Xu mu X =(x0, x1,..,xm), di m.
Output: mng gi tr suff[].
Formats:
Suff(X, m, suff);
Actions:
suff[m - 1] = m;
g = m - 1;
for (i = m - 2; i >= 0; --i) {
if (i > g && suff[i + m - 1 - f] < i - g){
suff[i] = suff[i + m - 1 - f];
}
else {
if (i < g)
g = i;
f = i;
while (g >= 0 && x[g] == x[g + m - 1 - f])
--g;
suff[i] = f - g;
}
}
EndActions.
G
2
C
6
(i>g)?
False
False
True
True
True
False
False
g
7
6
1
True
1
False
1
True
1
1
0
Kt lun : suff[]={1, 0, 0, 2, 0, 4, 0, 8}
Xu mu X =(x0, x1,..,xm), di m.
Output: mng gi tr bmGs[].
Formats:
preBmGs(x, m, bmGs[]);
Actions:
suffixes(x, m, suff);
for (i = 0; i < m; ++i)
bmGs[i] = m;
j = 0;
for (i = m - 1; i >= 0; --i)
if (suff[i] == i + 1)
for (; j < m - 1 - i; ++j)
if (bmGs[j] == m)
bmGs[j] = m - 1 - i;
for (i = 0; i <= m - 2; ++i)
f
0
6
5
5
3
3
1
0
Suff(i)
8
0
4
0
2
0
0
1
bmGs[m - 1 - suff[i]] = m - 1 - i;
EndActions.
( suff(i) == (i+1) )?
False
False
False
False
False
False
False
True
i
0
1
2
3
4
5
6
( bmGs[j] == m )?
bmGs[j]
0
1
2
3
4
5
6
True
True
True
True
True
True
true
7
7
7
7
7
7
7
Suff(i)
bmGs[m-1-suff(i)]
1
bmGs[6] = 7
0
bmGs[7] = 6
0
bmGs[7] = 5
2
bmGs[5] = 4
0
bmGs[7] = 3
4
bmGs[3] = 2
0
bmGs[7] = 1
Kt lun : bmGs[] = {7, 7, 7, 2, 7, 4, 7, 1}
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
turboShift = u - v;
bcShift = bmBc[y[i + j]] - m + 1 + i;
shift = max(turboShift, bcShift);
shift = max(shift, bmGs[i]);
if (shift == bmGs[i])
u = min(m - shift, v);
else {
if (turboShift < bcShift)
shift = max(shift, u + 1);
u = 0;
}
}
j += shift;
}
Kim nghim TBM(X, m, Y, n) :
Y[]=GCATCGCAGAGAGTATACAGTACG, n = 24
X[]=GCAGAGAG, m = 8
Bc 1(Tin x l) : thc hin preBmGs(X, m , bmGs), preBmBc(X, m, bmBc) ta nhn c :
bmGs[] = {7, 7, 7, 2, 7, 4, 7, 1}
x
bmBc[c]
A
1
G
2
C
6
Bc 2 (Lp) :
i
( i < 0 )?
turboShift
bcShift
7
5
false
false
0
2
0
-2
1
4
shift
8
1
4
(shift==bmGs[i])?
True
true
u
0
0
2
j
0
1
5
-1
5
6
true
false
false
2
1
7
-1
0
4
true
1
1
7
true
Kt qu : X xut hin trong Y ti v tr 5.
a. c im
S dng 1 cha v tr ca mi k t
Pha tin x l phc tp thi gian & phc tp khng gian l O( m + )
Pha tm kim phc tp thi gian l O(mn)
O(n) th hin so snh k t
b.Thut ton
1
2
1
12
16
23
{
for (ptr = z[y[j]]; ptr != NULL; ptr = ptr->next)
{
cout<<ptr->element<<" ";
if (memcmp(x, y + j - ptr->element, m) == 0)//dich sao cho ky tu trung nhau ca *x va *y roi so sanh
}
}
}
EndActions.
Kim nghim SKIP :
Pha tin x l
s biu thi cho th t node
i
ptr->next
ptr->element
z[x[i]]
Kt qu
c
z[c]
6,4,2
7,5,3,0
khng c
Pha tm kim
j
y[j]
ptr->element
ghi ch
4
2
15
khong co
23
7
5
3
0
23+8 >y.length dng
a. c im
Mt dng n gin ca thut ton Boyer -Moore
Pha tin x l phc tp thi gian l O( m + ) phc tp khng gian l O()
Pha tm kim phc tp thi gian l O(mn)
b.Thut ton
//tien xu ly bang
/* Searching */
j = 0;
while (j <= n - m) {
c = y[j + m - 1];
if (x[m - 1] == c && memcmp(x, y + j, m - 1) == 0)
cout<<j;
j += bmBc[c];
}
}
EndActions.
Kim nghim HORSPOOL:
Y[]="ABABDABACDABAACABABE" n=20
X[]="ABAACABAB" m=9
BmBc[]
A
bmbc
J=J+BmBc[c]<nm=11?
0(yes)
4(yes)
5(yes)
7(yes)
8(yes)
10(yes)
C= y[j + m - 1]
x[m-1]=x[8]=C?
So snh mng x v y t v tr j
ti j+m-1
BmBc[c]
Y[8]=C
B(no)
ABAACABA vs
ABABDABA (no)
Y[12]=A
B(no)
ABAACABA vs
DABACDAB (no)
Y[13]=B
B(yes)
ABAACABA vs
ABACDABA (no)
Y[15]=A
B(no)
ABAACABA vs
ACDABAAC (no)
Y[16]=B
B(yes)
ABAACABA vs
CDABAACA (no)
Y[18]=B
B(no)
ABAACABA vs
ABAACABA (yes)
12(no)
a. c im
Thc hin dch ging thut ton Horspool
Pha tin x l phc tp thi gian l O( m + ) phc tp khng gian l O()
Pha tm kim phc tp thi gian l O(mn)
b.Thut ton
Thut ton preBmBc:
Input :
Xu mu x
di xu mu m
Output:
Mng bmBc[ ] l mng cha v tr xut hin cui cng ca cc k t trong xu x
Formats:
void preBmBc(char *x, int m, int bmBc[])
Actions:
void preBmBc(char *x, int m, int bmBc[]) {
//con tro noi dung tai dia chi co gia tri = x
int i;
for (i = 0; i < ASIZE; ++i)
bmBc[i] = m;
for (i = 0; i < m - 1; ++i)
bmBc[x[i]] = m - i - 1;
}
EndActions.
Kim nghim preBmBc :
v d :
c ang xt
v tr cui cng
G
T
c = y[j + m - 1];
if (lastCh == c && middleCh == y[j + m/2] &&
firstCh == y[j] &&
memcmp(secondCh, y + j + 1, m - 2) == 0)
cout<<j;
j += bmBc[c];
}
}
EndActions.
Kim nghim Raita:
j
y[i+m-1]
0
y[j + m/2]
y[j]
ghi ch
A != lastCh
dch 1 (bmBc[A])
trng
C != firstCh
dch 2 (bmBc[G])
G
T
T!= firstCh
dch 2 (bmBc[G])
G
G
G
ht
dch 2 (bmBc[G])
c im:
Xu mu X =(x0, x1,..,xm), di m.
Output: Mng gi tr QsBc [].
Formats:
preQsBc(char *x, int m, int qsBc[])
Actions:
int i;
for (i = 0; i < ASIZE; ++i)
qsBc[i] = m + 1;
for (i = 0; i < m; ++i){
qsBc[x[i]] = m - i;
}
EndActions.
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
Actions:
void QS(char *x, int m, char *y, int n) {
int j, qsBc[ASIZE];
/* Preprocessing */
preQsBc(x, m, qsBc);
/* Searching */
j = 0;
while (j <= n - m) {
if (memcmp(x, y + j, m) == 0) // so snh xu x v xu y tu vi tri j toi m
OUTPUT(j);
j += qsBc[y[j + m]];
/* shift */
}
}
EndActions.
Kim nghim:
Y[]="ABABDABACDABAACABABE" n=20
X[]="ABAACABAB" m=9
BmBc[] ngoi A B C cn li = m+1 =10
A
bmbc
J=J+ qsBc[y[j +
m]]<n-m=11?
y[j + m]
0(yes)
Y[9]=D
qsBc[D]=10
10(yes)
Y[19]=E
qsBc[E]=10
20(no)
Code:
#include<stdio.h>
#include<conio.h>
#include<string.h>
#include<stdlib.h>
#include<iostream>
qsBc[y[j + m]]
Xu mu X =(x0, x1,..,xm), di m.
Output: Mng gi tr BmBc[].
Formats:
EndActions.
Thut ton Reverse Factor Algorithm:
Input :
Xu mu T=(t0,t1,,tm), di m.
Vn bn P=(p0,p1,,pm), di n.
Output:
d.push_back(0);
}
}
l++;
if(d[0]==1){
a=l;
}
if(kt(d,n)){
return a;
}
for(int i=m-2;i>=0;i--){
for(int j=0;j<=n-1;j++){
if(x[i]==y[j]&&d[j+1]==1){
d.insert(d.begin()+j,1);
d.erase(d.begin()+j+1);
}
else{
d.insert(d.begin()+j,0);
d.erase(d.begin()+j+1);
}
}
l++;
if(d[0]==1){
a=l;
}
if(kt(d,n)){
return a;
}
}
return a;
}
void xuli(){
int i=0,a=0;
while(i<=n-m){
string w=t.substr(i,m);
int k=LSP(w,p);
if(k==m){
cout<<i<<endl;
w.erase(w.begin());
a=m-LSP(w,p);
}
else{
a=m-k;
}
i=i+a;
}
}
main(){
init();
xuli();
}
Kim nghim :
T=GCATCGCAGAGAGTATACAGTACG
P=GCAGAGAG
i
0
0+5=5
5+7=12
12+8=20>n-m(kt thc)
LSP
3
8(bng m)
0
shift
8-3=5
8-1=7
8-0=8
Input :
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
Actions:
Void ZT(char *x, int m, char *y, int n){
Int i,j, ztBc[ASIZE][ASIZE], bmGs[XSIZE];
preZtBc(x,m,ztBc);
preBmGs(x,m,bmGs);
j=0;
while(j<=n-m){
i=m-1;
while(i<m&&x[i]==y[i+j])-- i;
if(i<0){
OUTPUT(j);
j + =bmGs[0]:
out
No
Yes(5)
No
}else{
j + = MAX(bmGs[i], ztBc[y[j + m -2]][y[j+ m-1]]);
}
}
}
Code :
#include<iostream>
#include<vector>
using namespace std;
string t,p;
int m,n;
vector<int>bmGs;
vector<int>suff;
int ztBc[256][256];
void init(){
cin>>t>>p;
n=t.length();
m=p.length();
}
void suffixes(string x,int m) {
int f,g,i;
suff[m-1]=m;
g=m-1;
for(i=m-2;i>=0;--i){
if (i>g&&suff[i+m-1-f]<i-g)
suff[i]=suff[i+m-1-f];
else {
if(i<g)
g=i;
f=i;
while(g>=0&&x[g]==x[g+m-1-f])
--g;
suff[i]=f-g;
}
}
}
int i,j;
for(int i=0;i<256;i++){
for(int j=0;j<256;j++){
ztBc[i][j]=m;
}
}
for(int i=0;i<256;i++){
ztBc[i][p[0]]=m-1;
}
for(int i=1;i<m-1;i++){
ztBc[p[i-1]][p[i]]=m-1-i;
}
}
int kt(string w, string p){
int m=p.length();
for(int i=m-1;i>=0;i--){
if(w[i]!=p[i]){
return i;
}
}
return -1;
}
void xuli(){
int i=0;
while(i<=n-m){
string w=t.substr(i,m);
int k=kt(w,p);
if(k==-1){
cout<<i<<endl;
i+=bmGs[0];
}else{
i+=max(bmGs[k],ztBc[(int)w[m-2]][(int)w[m-1]]);
}
}
}
main(){
init();
for(int i=0;i<m;i++){
suff.push_back(-1);
}
suffixes(p,m);
preBmGs(p,m);
preZtBc(p,m);
xuli();
}
Kim nghim :
Y=GCATCGCAGAGAGTATACAGTACG
X=GCAGAGAG
ztBc
A
C
G
T
A
8
5
1
8
C
8
8
6
8
G
2
7
7
7
T
8
8
8
8
i
X[i]
bmGs[i]
0
G
7
1
C
7
2
A
7
i
0
0+5=5
5+7=12
12+4=18
3
G
2
4
A
7
5
G
4
k
7
-1
5
6
6
A
7
7
G
1
shift
5
7
4
7
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
Actions:
Void preBrBc(char *x, int m, int brBc[ASIZE][ASIZE]){
int a,b,i ;
for(a=0; a<ASIZE; ++a)
for(b=0; b<ASIZE; ++b)
brBc[a][b]=m+2;
for(a=0; a<ASIZE; ++a)
brBc[a][x[0]]=m+1;
for(i=0; i<m-1;++i)
brBc[x[i]][x[i+1]]=m-i;
for(a=0; a<ASIZE; ++a)
output
no
Yes(5)
No
No
brBc[x[m-1]][a]=1;
}
Void BR(char *x, int m, char *y, int n){
int j, brBc[ASIZE][ASIZE] ;
preBrBc(x, m, brBc);
y[n+1]=\0;
j=0;
while(j<=n-m){
if(memcmp(x,y+j,m)==0) OUTPUT(j);
j+=brBc[y[j+m]][y[j+m+1]];
}
}
Thut ton Tuned Boyer-Moore:
Input :
Xu mu X =(x0, x1,..,xm), di m.
Vn bn Y =(y0, y1,..,xn), di n.
Output:
Actions:
void TUNEDBM(char *x, int m, char *y, int n) {
int j, k, shift, bmBc[ASIZE];
/* Preprocessing */
preBmBc(x, m, bmBc);
shift = bmBc[x[m - 1]];
bmBc[x[m - 1]] = 0;
memset(y + n, x[m - 1], m);
/* Searching */
j = 0;
while (j < n) {
k = bmBc[y[j + m -1]];
while (k != 0) {
j += k; k = bmBc[y[j + m -1]];
j += k; k = bmBc[y[j + m -1]];
j += k; k = bmBc[y[j + m -1]];
}
if (memcmp(x, y + j, m - 1) == 0 && j < n)
OUTPUT(j);
j += shift;
/* shift */
}
}
EndActions.
Kim nghim:
Y[]="ABABDABACDABAACABABE" n=20
X[]="ABAACABAB" m=9
BmBc[] cc gi tr khc = m=9
A
bmbc
Giai on u
shift=BmBc[ x[m - 1]]=BmBc[x[8]]= BmBc[B];
BmBc[ x[m - 1]]=BmBc[x[8]]= BmBc[B]=0;
C li mng BmBc[]
bmbc
J+=shift
<n=20?
k = bmBc[y[j + m
-1]]
Nu k!=0
lp
0(yes)
k=bmBc[Y[8]]=
bmBc[C]=4
Lp
J+=k;
k = bmBc[y[j
+ m -1]]
J=0+4
=4
k=
bmBc[y[12]]=
bmBc[A]=1
So snh mng x v y t v tr
j vi m-1phn t v j>n?
J=4+1
=5
k=
bmBc[y[13]]=
bmBc[B]=0
J=5+0
=5
k=
bmBc[y[13]]=
bmBc[B]=0
Thot lp
ABAACABA vs
ABACDABA (no)
J=5<n=20 (yes)
7(yes)
k=bmBc[Y[15]]=
bmBc[A]=1
Lp
J=7+1
=8
k=
bmBc[y[16]]=
bmBc[B]=0
J=8+0
=8
k=
bmBc[y[16]]=
bmBc[B]=0
J=8+0
=8
k=
bmBc[y[16]]=
bmBc[B]=0
Thot lp
ABAACABA vs
CDABAACA (no)
J=8<n=20 (yes)
10(yes)
k=bmBc[Y[18]]=
bmBc[B]=0
Khng lp
ABAACABA vs
ABAACABA (yes)
J=10<n=20 (yes)
12(yes)
k=bmBc[Y[20]]=
bmBc[B]=9
Lp
Thot lp
J=12+
9=21
k=
bmBc[y[29]]=
bmBc[B]=0
J=21+
0=21
k=
bmBc[y[29]]=
bmBc[B]=0
J=21+
0=21
k=
bmBc[y[29]]=
bmBc[B]=0
J=21>n=20 (yes)
23(no)
Code:
#include<stdio.h>
#include<conio.h>
#include<string.h>
#include<stdlib.h>
#include<iostream>
using namespace std;
#define ASIZE 256
void OUTPUT(int j){
cout<<"\n\n "<<j;
}
void preBmBc(char *x, int m, int bmBc[]) {
int i;
// gn mang bmBc tu x[0]->x[m-1] = i con lai la m
for (i = 0; i < ASIZE; ++i)
bmBc[i] = m;
cout<<" \n";
for (i = 0; i < m - 1; ++i){
bmBc[x[i]] = m - i - 1;
cout<<" x[i]: "<<x[i]<<" - bmBc[x[i]]: "<<bmBc[x[i]]<<"\n";
}
cout<<"\n ";
}
void TUNEDBM(char *x, int m, char *y, int n) {
int j, k, shift, bmBc[ASIZE];
/* Preprocessing */
preBmBc(x, m, bmBc);
shift = bmBc[x[m - 1]];
bmBc[x[m - 1]] = 0;
memset(y + n, x[m - 1], m);
/* Searching */
j = 0;
while (j < n) {
k = bmBc[y[j + m -1]];
while (k != 0) {
j += k; k = bmBc[y[j + m -1]];
j += k; k = bmBc[y[j + m -1]];
j += k; k = bmBc[y[j + m -1]];
}
if (memcmp(x, y + j, m - 1) == 0 && j < n)
OUTPUT(j);
j += shift;
/* shift */
}
}
main()
{
char *txt= "ABABDABACDABABCABABE";
char *pat = "ABABCABAB";
cout<<txt<< " "<<strlen(txt);
cout<<"\n"<<pat <<" "<<strlen(pat);
cout<<"\n";
TUNEDBM(pat,strlen(pat), txt,strlen(txt));
getch();
return 0;
}
Thut ton
a. c im
b.Thut ton