Professional Documents
Culture Documents
Асимметричные меры подобия текстов
Асимметричные меры подобия текстов
:
, 2012, . 247253.
007:159.955:004.838.3:519.816
.., ..., ., . . . , .
Leonid.Leonenko@gmail.com
, ()
. ,
, () ()
.
(
) Computer Science.
(., ., [1], [2], [3]). [4]
("")
. ,
(, , .)
[5].
S a b ,
:
S(a,b)=S(b,a). , ,
. , " "
" " ,
().
[4], ""
"" (
"") . , (,
) ""
. .
, "",
(., ., [6], [7], [8]).
"" , [9]
-,
,
( / ).
,
"" ,
.
""
,
(). .
[4], [9]; , , [5]. (
). , "", ""
"" : "" "
" (.. ), ...
a X
,
.
.
.
[4] D k (a,b) a b
a b. k
0 ( ) 1 ( )
.
a, b a .
W k (a ,b)
, a b. W k (a,b)=M>0,
a
b: {A i1 ,..., A iN } {B j1 ,..., B jN } , A iL =B jL (L=1,...,N),
{A i1 ,..., A iN } M.
W k (a,b) a b, N
.
W k (a,b) a b
, N
{A iL } {B jL } a b
, a .
:
X Y ... Z
(a) i 1 i 2 ... i N
(b) j 1 j 2 ... j N
X=A i1 =B j1 , Y=A i2 =B j2 , ..., Z=A iN =B jN
, a b. , ,
i 1 < i 2 <...< i N .
W k (a,b)
B 0 (a,b).
,
a 1.
, .
Dw(u) u,
u). B k (a,b)
W k (a,b). , , L . :
Dw k (b:a,L) = W k (a,b) + L{ D(b) D(B k (a,b) )}
Dw k (b:a,L) "L- a b".
B k (a,b) ( W k (a,b))
b, "quasi-" L.
a b ,
Dw(a)=0 , Dw(a)>0 :
G k (a,b,L) = W k (a,b) / max{ Dw(a), Dw k (b:a,L) } (k=0,1).
. G k (a,b,L)
:
(1) a 0, b L
G k (a,b,L)=0.
L>0
G 0 (a,b,L)=1
, b
B 0 (a,b),
a B 0 (a,b)
, 0.
(5) L>0 G 1 (a,b,L)=1 , b a
a ,
a .
(6) G 1 (a,b,L) G 0 (a,b,L) a, b L.
, a b
G k (a,b,L), "quasi-" L. , , L=0,
b, B k (a,b). , ,
"" a b. , , L
a (L>> Dw(a)), b
G k (a,b,L)
, .
.
L
, , ""
Lm a. ,
Lm (, ,
), " ",
, .
1.
a = ,
b 1 = ,
b2
. a: 2, 2, 2, 3, 3, 3, 1, 1, 1, 1, 0, 0,
: G 1 (a, b 1 , Lm)=0.771
G 1 (a, b 2 , Lm)=0.474;
3. a = , : 2, 2, 2, 2, 2, 1, 2, 0; b = . c = .
b a, G 1 (a, b, Lm)=0.462,
G 1 (a, c, Lm)=0.923 G 1 (b, c, Lm)=0.571. "quasi-"
u v : (u, v) = 1 G 1 (u, v, Lm). 3
, . ,
(a, b)=0.538, (a, c)=0.077, (b, c)=0.429. , (a, b) > (a, c)+ (b, c), ..
"quasi-" .
4. a = PROPORTION ,
a. PROPOR a
2, TION 1. " " L
G 1 (a, b, L) 2 b,
. b ,
G 1 (a, b, 2) "" F 1 (a,b) a b:
PORTRAIT (0.5, 0.5);
POSITION (0.5, 0.6);
REPETITION (0.5, 0.6);
PORTION
(0.625, 0.7);
POPORSION (0.813, 0.8);
PROPOTION (0.875, 0.9);
, " ", , b
" a", 0.7. ,
F 1 (a,b), PROTECTION, PORTION
PREPARATION PROPORTION.
G 1 (a,b,L), , 4-
, "" a.
0.81. .
G k (a,b,L)
n 1 n 2 ( , ,
) n
.
G k (a,b,L)
W 1 (a,b).
heaviest common
subsequence (hcs) [1]. ,
W 1 (a,b) O(n2); O(n log n) [6].
"" G k (a,b,L)
. , , [10],
W 1 (a,b) ,
"" a b,
( "" ). ,
, ,
20-30,
.
G k (a,b,L)
.
, , : "
?".
: " 5 6 1648 ".
(. ;
b 1 ( 4). , ""
"" ,
( CONTROL).
b 2 = 1648 (0.882)
b 2 , , , b 1 .
() F 1 (a,b)
"".
b 3 =
1648 (0.570)
b 3 b 2 , "" ,
.
"",
CONTROL : b 1 , b 2
, b 3 .
. ""
,
, [4],
,
. ""
(),
.
"".
() ,
/ (., ., [11], [12]).
,
, () ,
.
1992.
108
p.;
wwwcdf.pd.infn.it/localdoc/string_search.ps.gz
2. Novarro G. A Guided Tour to Approximate String Matching // ACM Computing
Surveys. 2001. Vol.33. No.1. P.3188.
3. .M.
// 6- . .
. RCDL2004. , , 2004; http://rcdl.ru/doc/2004/paper27.pdf .
4.
..,
..
//
. 1996. 8. .119131.
5. . .
// . VII . IAI-2007.
.: , 2007, . 210220.
6. Jacobson G., Vo K-P. Heaviest Increasing/Common subsequence Problems //
Combinatorial Pattern Matching. Third Annual Symposium (CPM 92).
Proceedings. 1992. P.5266.
7. Ma Y., Wang C.A. A New Model for Global Multiple Alignment of Whole Genome
Sequences // International Journal of Information Technology. 2005. Vol. 11.
No. 8. P.6774.
8. Amir A., Gotthilf Z., Shalom B.R. Weighted LCS (Extended Abstract) // Lecture
Notes in Computer Science.2009.Vol. 5874/2009. P.3647.
9. Leonenko L. Analogical inferences in computer assisted knowledge testing systems
// 6-th Multi-Conference on Systemics, Cybernetics and Informatics. Proceedings.
2002. XVIII. P. 371376.
10. . . :
//
, , . . 4. . 3
..: , 2004. . 6 12.
11. .., .. // . .
. . 1987. 5. .42 63.
12. Hummel J., Holyoak K. Distributed Representations of Structure: A Theory of
Analogical Access and Mapping // Psychological Review. 1997. Vol. 104.
No. 3. P. 427 466.