Professional Documents
Culture Documents
المرجع التعليمي في التنقيب عن البيانات PDF
المرجع التعليمي في التنقيب عن البيانات PDF
l^âçjÖ]<ÌÒ
K
אאאאא
F EoutliersFא EanomalousF אא
אאאK אE
אאא Edeviation detectF
א ،Eexception miningF אא א ، א
KEoutlierFEanomalyFאאאKא
א ، א א
א K א אEmachine learningF א א
א EanomalousF א א
F א אא א א K א א
،K אא،E
Kאאאאא?א?א
،א א ،א א א א א
א K א،א
،אאאאאK אאא
K
אא،אא
W א א א א א
אא،אא،אאא
Kאא
†^ÃÖ]<Ø’ËÖ] 654
Kאאא
אא :(Fraud Detection) אآﺘﺸﺎف اﻻﺣﺘﻴﺎل
אK אא
K
א א א א א
Kאא
א א :(Intrusion Detection) اﻟﻜﺸﻒ ﻋﻦ اﻟﺘﻄﻔﻞ
K אא א אא
،אאאא،אאא
א،אאאא
אאאאאאא K
K
אאא :(Ecosystem Disturbance) اﺿﻄﺮاب اﻟﻨﻈﺎم اﻟﺒﻴﺌﻲ
אאאאא K א
א אK
אא אא אא
Kאא
א א אא א :(Public Health) اﻟﺼﺤﺔ اﻟﻌﺎﻣﺔ
א،Kאא
،E אF א
אאא
Kאא
אא ، :(Medicine) اﻟﺪواء
K אאא
K א،
אאF
KEאאא
655 l^âçjÖ]<ÌÒ<V10
אאאא א
אEאFאא،אא
א، K אא
אאאאאאEmeanFא
KאאאEאFאK
אK אאאא
K،אאא
íè‚éã³<l]çŞ} <1.10
א א א
א (2) ،א (1) Kא
אאאאא (3) ،א א
(4) ،Eclass labelF א
Kאא
l^âçi<î×Â<Ùç’£]<h^f‰_ <1.1.10
، W
אא
Kא،אא
E F .ﺑﻴﺎﻧﺎت ﻣﻦ أﺻﻨﺎف ﻣﺨﺘﻠﻔﺔ
אא K
אאא
F אאאאאא K א
א E אאאאאא
א K אא
Kאאאא
EF אאא
KDouglas Hawkinsאא
†^ÃÖ]<Ø’ËÖ] 656
l^âçjÖ]<àÂ<ÌÓÖ]<ц <2.1.10
K א א א א
Kא1א،אאא
א .(Model-Based) اﻟﺘﻘﻨﻴﺎت اﻟﺘﻲ ﺗﺴﺘﻨﺪ إﻟﻰ اﻟﻨﻤﻮذج
K א א K
א
657 l^âçjÖ]<ÌÒ<V10
אאאאא
F א K אא EparametersF
،א א אK א א אE
،Eregression modelFאאאאK א
KEpredictedFאאא
، א א א
אא K אאאאא
אאאא
אאא،א KEtraining setF
KE7.5אאFKאאאאאא
אאא،אא
אאאK
Kא،
א .(Proximity-Based) اﻟﺘﻘﻨﻴﺎت اﻟﺘﻲ ﺗﺴﺘﻨﺪ إﻟﻰ اﻟﻘﺮاﺑﺔ
אKאא א א ،א א
אK אאאא
K אאאאאאאא
א אאאאאאא
Kאא،א
אא .(Density-Based) اﻟﺘﻘﻨﻴﺎت اﻟﺘﻲ ﺗﺴﺘﻨﺪ إﻟﻰ اﻟﻜﺜﺎﻓﺔ
א K
אאא،א
K
א،אאא
אאא
א ،
Kא
†^ÃÖ]<Ø’ËÖ] 658
Í^ß‘ù]<l^éÛŠi<Ý]‚~j‰] <3.1.10
،אא،EunsupervisedF אא W
א
F אא א KEsemi-supervisedF אא
KאאEEnormalFEanomalyF
.(Supervised anomaly detection) اﻟﻜﺸﻒ اﻟﻤُﺮاﻗﺐ ﻋﻦ اﻟﺘﺸﻮهﺎت
א אא א
KE אFK
(rare class)אאא
אK א א
K7.5א
.(Unsupervised anomaly detection) اﻟﻜﺸﻒ ﻏﻴﺮ اﻟﻤُﺮاﻗﺐ ﻋﻦ اﻟﺘﺸﻮهﺎت
אK
א א
א א
אEinstanceFE FEscoreF
א א א K
א אא א KElow outlier scoreF
،אא،
Kאא
.(Semi-supervised anomaly detection) اﻟﻜﺸﻒ ﺷﺒﻪ اﻟﻤُﺮاﻗﺐ ﻋﻦ اﻟﺘﺸﻮهﺎت
،ElabledF א
אאא אK אא
אאאאא
אאאאK אא
א א
א א א Kאא
Kאאאא
659 l^âçjÖ]<ÌÒ<V10
א א א א א א אא
אאאא K אאאא
K7.5אאErare classFאא
íÚ^â<Øñ^ŠÚ <4.1.10
Kאאאאאא
אאא .ﻋﺪد اﻟﺴﻤﺎت اﻟﻤﺴﺘﺨﺪﻣﺔ ﻟﺘﻌﺮﻳﻒ ﺗﺸﻮﻩ
K אאאא
K א،א،
א
EFאאKאא
K 300א،300
אאאאא
Kא Kא
،אא .اﻟﻤﻨﻈﻮر اﻟﺸﺎﻣﻞ ﻓﻲ ﻣﻘﺎﺑﻞ اﻟﻤﺤﻠﻲ
א K אא
،אא 5 א 6
Kאא
אאא .ًإﻟﻰ أي درﺟﺔ ﺗﻜﻮن ﻧﻘﻄﺔ ﺗﺸﻮهﺎ
אאאא K א W
א K א א א
א א א א K
KEanomaly or outlier scoreF
.ﺗﺤﺪﻳﺪ ﺗﺸﻮﻩ واﺣﺪ ﻓﻲ آﻞ ﻣﺮة أم ﺗﺤﺪﻳﺪ ﻋﺪة ﺗﺸﻮهﺎت دﻓﻌﺔ واﺣﺪة
אאא،אאאאא
Kא K אא
†^ÃÖ]<Ø’ËÖ] 660
،EmaskingFאא
אאK
א
،EswampingFא א
אא،Emodel-basedFאאאK א
Kא א
א،אאאאא .اﻟﺘﻘﻴﻴﻢ
K7.5אאאאאא
،אא אאא
Efalse positive errorF אא EprecisionF א ErecallF א
،אא KEaccuracyF א
אאאאאאאא
Kא
K אאאא.اﻟﻔﻌﺎﻟﻴﺔ
،אאאא
אא K
אאאאא K
א א א א ،א m ،O(m)2
אאא KEproximity matrixF אא
א ،א א ،
Kאאא3אKא
Ðè†ŞÖ]<íŞè†}
،א W אאאאא
אK אאא،אאא،אאאא
א א א K א
K
661 l^âçjÖ]<ÌÒ<V10
íéñ^’uý]<цŞÖ] <2.10
،،אאא
אאאאK א
א K
אאאא
Kא2.10
א א א .( )اﻟﺘﻌﺮﻳﻒ اﻹﺣﺼﺎﺋﻲ ﻟﻠﻜﺎﺋﻦ اﻟﺸﺎذ2.10 اﻟﺘﻌﺮﻳﻒ
Kאא
אK אאא
EmaenF א א ، א א
K א אא א א א אא
Kאאא
א2.10אאאא אאאא
KאEdiscordant observationsFאא،אא
א א אא
אאאאK אאא
Kא،
íÚ^â<Øñ^ŠÚ
Wאאאאאאא
אאא .ﺗﺤﺪﻳﺪ ﺗﻮزﻳﻊ ﻣﺠﻤﻮﻋﺔ ﺑﻴﺎﻧﺎت
א ،EGaussianF א ،א א
א ، EbinominalF א ،EPoissonF
אאא K
אא K
אאאא،
א K אא E אF
KEheavy-tailed distributionsFאאא
†^ÃÖ]<Ø’ËÖ] 662
Çj¹]<ì‚éuæ<íéÃéf<l^Ãè‡çi<»<ƒ]çÖ]<àÂ<ÌÓÖ] <1.2.10
،אאאא EאF אא
µ א א K אא
א KN(µ,σ) אא،E אאאFσ EאF
KN(0, 1)א1.10
اﻟﻜﺜﺎﻓﺔ اﻻﺣﺘﻤﺎﻟﻴﺔ
1 واﻧﺤﺮاف ﻣﻌﻴﺎري0 ﺗﺎﺑﻊ اﻟﻜﺜﺎﻓﺔ اﻻﺣﺘﻤﺎﻟﻴﺔ ﻟﺘﻮزﻳﻊ ﻏﻮﺻﻲ ﺑﻤﺘﻮﺳﻂ 10.1 اﻟﺸﻜﻞ
663 l^âçjÖ]<ÌÒ<V10
KאEtailFN(0, 1)אEF
K אאא±3אא 0.0027א
|x| ≥ cא ،אxcא
1.10אKα = prob(|x| ≥ c)Kcאא
אא KN(0,1) אא α c א
Kאאאאאא4
0 ﻣﻦ أﺟﻞ ﺗﻮزﻳﻊ ﻏﻮﺻﻲ ﺑﻤﺘﻮﺳﻂ،α = prob(|x| ≥ c) ﺣﻴﺚ،(c, α) أﻣﺜﻠﺔ ﻋﻦ أزواج 1.10 اﻟﺠﺪول
1 واﻧﺤﺮاف ﻣﻌﻴﺎري
|x| ≥ c (1.10)
Kprob(|x| ≥ c) = αאc
EאFאKαאאאא
א α ،א
אא K אא
Kאα،N(0,1)א
†^ÃÖ]<Ø’ËÖ] 664
µ E א א F א א
אאא،EN(µ,σ)Fσאא
KN(0,1) ،z x א EtransformF 3.10
KEEz scoreFz z FKz = (x-µ)/σ א
אאא x אא σµ
Kאאאאאאא Ksx
א א KN(0, 1) z
K7אEGrubbsאF
l÷çvj¹]<ì‚ÃjÚ<íéÃéfŞÖ]<l^Ãè‡çjÖ]<»<ìƒ^Ö]<l^ßñ^ÓÖ] <2.2.10
אאאאא
אK א
K אא
Kאא،אא
א א ،EאF א א EcorrelationF א
א2.10אK EsymmetricalFא א
(0, 0) אא א
WEcovariance matrixF
⎛ 1.00 0.75 ⎞
∑ = ⎜⎜ 0.75 ⎟
3.00 ⎟⎠
⎝
KאאS
665 l^âçjÖ]<ÌÒ<V10
اﻟﻜﺜﺎﻓﺔ
اﻻﺣﺘﻤﺎﻟﻴﺔ
3.10 اﻟﻜﺜﺎﻓﺔ اﻻﺣﺘﻤﺎﻟﻴﺔ ﻟﺘﻮزﻳﻊ ﻏﻮﺻﻲ ﺗﻢ اﺳﺘﺨﺪاﻣﻪ ﻟﺘﻮﻟﻴﺪ ﻧﻘﺎط اﻟﺸﻜﻞ 2.10 اﻟﺸﻜﻞ
א א Mahalanobis א
אElogF Mahalanobis K א
K5אאKא
.( )اﻟﻜﺎﺋﻨﺎت اﻟﺸﺎذة ﻓﻲ اﻟﺘﻮزﻳﻌﺎت اﻟﻄﺒﻴﻌﻴﺔ ﻣﺘﻌﺪدة اﻟﻤﺘﺤﻮﻻت1.10 اﻟﻤﺜﺎل
E א FMahalanobis 3.10 א
B(5,5) A(-4,4) א K א
K
א Mahalanobis ،א
K2.10אאאא2000אא
A ، K Mahalanobis B A
א א אE(0,0) x F א
Mahalanobis Mahalanobis B ،א
،24 Mahalanobis 5 2 B א K אא
K35Mahalanobis 4 2 A
†^ÃÖ]<Ø’ËÖ] 666
ﻣﺴﺎﻓﺔ
Mahalanobis
ﺛﻨﺎﺋﻴﺔ اﻷﺑﻌﺎد2002 ﻟﻠﻨﻘﺎط ﻣﻦ ﻣﺮآﺰ ﻣﺠﻤﻮﻋﺔ ﻧﻘﺎط ﻋﺪدهﺎMahalanobis ﻣﺴﺎﻓﺔ 3.10 اﻟﺸﻜﻞ
åçjÖ]<àÂ<ÌÓ×Ö<¼×j~¹]<tƒçÛßÖ]<íÏè† <3.2.10
אFאKאאאאא
א א א א ،E2.2.9 א
אאא، Kא
Kאאאא،
א א א א א
،א K EאF א א EmaximizeF
אאאא K א EM א
אאא K
אאאאאK אא
Kאאאאאא
667 l^âçjÖ]<ÌÒ<V10
MW
א Dא
אא K אא A ،EאF א
Wא
M א K א אא 1 0 λ x
At Mt KEuniformF A א ،א
A0M0 = D،t = 0 א Kt א E אF אאא
אא אElogFtאK
WאאD
⎛ ⎞⎛ A ⎞
∏ PD (xi ) = ⎜⎜ (1 − λ) ∏ PM ⎟⎜ λ t
∏ PA (xi ) ⎟⎟
Mt
Lt ( D) = ( x ) (4.10)
t i ⎟⎜ t
x i ∈D ⎝ x i ∈M t ⎠⎝ x i ∈ At ⎠
K
א At Mt D אאא PA PM PD t t
ÌÖÖ]æ<ìçÏÖ]<äqæ_ <4.2.10
אאאא
א K ،
א Kאאאא
669 l^âçjÖ]<ÌÒ<V10
K אא א א אא
אא، אא
Kא
íe]†ÏÖ]<±c<ğ]^ßj‰]<l^âçjÖ]<àÂ<ÌÓÖ] <3.10
،אאאאאאא
K
אאאK אא
א،אא
Kאא
k אאאא אאא
K 4.10 א KEk-nearest neighborF א
،אאאא،0אEscoreF
KE
درﺟﺔ اﻟﺸﺬوذ
درﺟﺔ اﻟﺸﺬوذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻤﺴﺎﻓﺔ إﻟﻰ اﻟﺠﺎر اﻷﻗﺮب اﻟﺨﺎﻣﺲ 4.10 اﻟﺸﻜﻞ
†^ÃÖ]<Ø’ËÖ] 670
درﺟﺔ اﻟﺸﺬوذ
ﺗﻜﻮن ﻟﻠﻜﺎﺋﻨﺎت اﻟﺸﺎذة.درﺟﺔ اﻟﺸﺬوذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻤﺴﺎﻓﺔ إﻟﻰ أول ﺟﺎر أﻗﺮب 5.10 اﻟﺸﻜﻞ
اﻟﻤﺠﺎورة درﺟﺎت ﺷﺬوذ ﻣﻨﺨﻔﻀﺔ
درﺟﺔ اﻟﺸﺬوذ
درﺟﺔ اﻟﺸﺬوذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻤﺴﺎﻓﺔ إﻟﻰ اﻟﺠﺎر اﻷﻗﺮب اﻟﺨﺎﻣﺲ .ﻳﺼﺒﺢ اﻟﻌﻨﻘﻮد اﻟﺸﻜﻞ 6.10
اﻟﺼﻐﻴﺮ ﺷﺎذاً
درﺟﺔ اﻟﺸﺬوذ
درﺟﺔ اﻟﺸﺬوذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻤﺴﺎﻓﺔ إﻟﻰ اﻟﺠﺎر اﻷﻗﺮب اﻟﺨﺎﻣﺲ .ﻋﻨﺎﻗﻴﺪ ذات آﺜﺎﻓﺎت اﻟﺸﻜﻞ 7.10
ﻣﺨﺘﻠﻔﺔ
†^ÃÖ]<Ø’ËÖ] 672
ÌÖÖ]æ<ìçÏÖ]<äqæ_ <1.3.10
אאאאאאאא
KO(m2) אאאא K אא
א ، א א
K א א אא א אא
K א א
א א
Kאא
א K7.10 אאאאא،
D C ،א א،אאא
4.10 אא K אא
، C k = 5
א DאאאKDא
Kאאאא
íÊ^nÓÖ]<±c<ğ]^ßj‰]<ƒæ„Ö]<àÂ<ÌÓÖ] <4.10
אאאאאאא
Kא
.( )اﻟﻜﺎﺋﻦ اﻟﺸﺎذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻜﺜﺎﻓﺔ5.10 اﻟﺘﻌﺮﻳﻒ
Kאא
אא אא אא א אא אא א
אאאK אאא
،אא K א k אאא
K6.10אאK،א
673 l^âçjÖ]<ÌÒ<V10
−1
⎛ ∑ y∈N ( x, k ) distance(x, y ) ⎞
density (x, k ) = ⎜ ⎟ (6.10)
⎜ N ( x, k ) ⎟
⎝ ⎠
íéfŠßÖ]<íÊ^nÓÖ]<Ý]‚~j‰^e<ƒ]çÖ]<àÂ<ÌÓÖ] <1.4.10
F א K אאאא
K2.10אאא،ELocal Outlier FactorFLOFאא
K ، אא
EkFאא
אאא K אאאא density(x, k)
K7.10 אאאאאא
אא x אאאאא
Kxאאא
ﺧﻮارزﻣﻴﺔ ﺣﺴﺎب درﺟﺔ ﺷﺬوذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻜﺜﺎﻓﺔ اﻟﻨﺴﺒﻴﺔ 2.10 اﻟﺨﻮارزﻣﻴﺔ
.( )اﻟﻜﺸﻒ ﻋﻦ اﻟﻜﺎﺋﻨﺎت اﻟﺸﺎذة اﺳﺘﻨﺎداً إﻟﻰ اﻟﻜﺜﺎﻓﺔ اﻟﻨﺴﺒﻴﺔ2.10 اﻟﻤﺜﺎل
אאאאאאאאא
8.10 א Kk = 10 K7.10 א אא א
א،K אא
א C B A א K א
א،אאאאK אא
א א ،EcompactF א א א
Kאא
675 l^âçjÖ]<ÌÒ<V10
LOF
( ﻣﻦ أﺟﻞ اﻟﻨﻘﺎط ﺛﻨﺎﺋﻴﺔ اﻷﺑﻌﺎدLOF) درﺟﺎت اﻟﺸﺬوذ اﺳﺘﻨﺎداً إﻟﻰ اﻟﻜﺜﺎﻓﺔ اﻟﻨﺴﺒﻴﺔ 8.10 اﻟﺸﻜﻞ
7.10 اﻟﻮاردة ﻓﻲ اﻟﺸﻜﻞ
ÌÖÖ]æ<ìçÏÖ]<äqæ_ <2.4.10
א א א אא אא א
K אא
mFO(m2)אא אאא
O(m log m) אאא، Eא
אאאK אא
kא אLOFאא
א א א א K א א
Kאאא
ì‚ÏßÃÖ]<±c<‚ßjŠi<l^éßÏi <5.10
א،א
א K
א א א א
†^ÃÖ]<Ø’ËÖ] 676
אאא،אא
Kאאא
אאאא
KK-means،אאאאKא
çÏßÂ<±c<àñ^Ò<ð^ÛjÞ]<ï‚Ú<ÜééÏi <1.5.10
K אאאא
אא אאא
א Kא Eoutlier scoreF
אאאאא
E א אF א Kא
KMahalanobisאא
אא
K אאאא
אאK אאאאאאא
Kאאאאאאא
אא א א א .( )ﻣﺜﺎل ﻳﺴﺘﻨﺪ إﻟﻰ اﻟﻌﻨﻘﺪة3.10 اﻟﻤﺜﺎل
،K-means אאאא אא K7.10 א
EcentroidF א (1) W א
EאFא،אא(2)،
Emedian distanceFאאאאא
אאאא K אאא
Kאאאא
א אK10.109.10אאא
אK אE אאאאאF
אא א א K
†^ÃÖ]<Ø’ËÖ] 678
אא،אא אKאD
،א
KEDCAFLOFאא
اﻟﺒُﻌﺪ
ﺑُﻌﺪ اﻟﻨﻘﺎط ﻋﻦ أﻗﺮب ﻣﺮآﺰ ﺛﻘﻞ 9.10 اﻟﺸﻜﻞ
اﻟﺒُﻌﺪ
íéÖæù]<ì‚ÏßÃÖ]<î×Â<ƒ]çÖ]<m`i <2.5.10
אאאאאאאא
W אאאאא K אאא
K א،אאא،א
א ، א א
א א Kאא
K אאאא K א
אא K אא
אא، אאא
אאאאא
K אאאא K אא
א
،Kאאא
א K א EnoiseF
KאEאFאאאא
íÚ‚~jŠ¹]<‚éÎ^ßÃÖ]<‚ <3.5.10
K אK-means
א،אאאאאא
،10 ،K א
א K א א
א ، א א א א
K
א K
א א ،א
K אא K א א
אא (2)،אא (1)א
K אא،אא
Kאאא
†^ÃÖ]<Ø’ËÖ] 680
ÌÖÖ]æ<ìçÏÖ]<äqæ_ <4.5.10
ElinearF א EK-means F א
אא אא א א ،א א
،אא K אא
אא K
אאאאא
א א אא
אאא، K אאא
א א א K
א א א
א 98אK אאאאא
אא،א
Kאא
àè…^ÛjÖ] <6.10
K2.1.10 א אא א א א K1
אאא אא،
א א א א א
KאאאאK
אא K
א אא אא K2
אאא א K א
אאאא، Kא
K א 8.6 א אEhypercliqueF
،א h-confidenceא،
אא KEmaximal hyperclique patternF
Kאאאא
W אאאאאאאאא .3
K א،אאאא،אא (model-based)אאא
681 l^âçjÖ]<ÌÒ<V10
א א א K
Kאא،
אאא E3.10 אאאFGrubb א K4
אאאK3.10אאא
אאאEz-scoreFzא
א z אאא K אא
EcriticalF אא،gc z
אא Kα Esignificance levelF
אאא Kא
Kאgc
t c2
Grubb אא m − 1 אא (a)
m m − 2 + t c2
K0.05א؟אm
KאאEF א(b)
EאאF א x א א K5
WאאΣµ
( x − µ ) ∑ −1 ( x −µ )
1 −
prob(x) = 1/ 2
e 2 (8.10)
( 2π )m ∑
µ S א א x א א א
log prob(x) ،E א FΣ א א
Kx x אxMahalanobis
אאא E אאFK-means א K6
KE10.10אFאאא5.10א
10.10 אאאאא (a)
א؟Kאאאא
K
10 ،א א (b)
א א א א א א
؟،؟א
K א א אEאF א אא (c)
Kאאא
א0.01אא K7
Efalse alarm rateFאאא،0.99
KEאאאאF؟א99%אEdetection rateFא
683 l^âçjÖ]<ÌÒ<V10
א ،א K8
א K א،א
،אאא
אאאאאK
א
؟אא
א K[0, 1] אא K9
אאאא
؟א
אא K10
אאאKא
Kא
א א א א א (a)
א א ، אFKא
KEא
אאאאא (b)
؟אא
†^ÃÖ]<Ø’ËÖ] 684