You are on page 1of 16
712722, 357 PM \Wine_Qualiy_ Prediction - Jupytr Notebook In [55]: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from from from from from from from from from from from In [3]: win sklearn sklearn sklearn sklearn sklearn sklearn sklearn sklearn sklearn sklearn sklearn \d. read. -model_selection import train_test_split stree import DecisionTreeClassifier sensemble import RandonForestClassifier vm import SVC import svm smetrics import confusion_matrix,classification_report sneighbors import kNeighborsClassifier snaive_bayes import GaussianNe -preprocessing import StandardScaler ,LabelEncoder -metrics import accuracy_score -ensemble import VotingClassifier {_csv('C:\\Users\\LENOVO\ \Documents\\wine. csv") In [4]: # view the first 10 of our dataset wine.head(10) out(4): ATER Wale SM Fela ons sur str deny pH sulphates aloha 074070 000 18 007 110 -&40 Os07e ast 0m8 04 178 088 000 26-0088 25070 0ss68 320 © oss as 2 78 076 00 28 00% 150 «S40 09970 278 as 212028 08 18 «007s 170600 Osm8D a6 sk 4 74 07 000 18007110 «540. O907B 351 Oss 5 74 068 090 18-0075 130400 ose7e 381 0588 6 79 060 00 18 0089150580 os0e4 350 04s 7 7a 985 00 12-0085 150 «240 Osm4s 909 047100 8 78 058 0G 20007330180 osmee 386 srs 8 75050 0% 64 —«wart_170 1020 sere 335 ad 105 712722, 357 PM \Wine_Qualiy_ Prediction - Jupytr Notebook In [5]: # the Last 5 from our dataset wine.tail() out[5]: free total fixed volatile citric residual chiorides sulfur sulfur density pH sulphates alcoh acidity acidity acid sugar ular sultur 1594-62 0600 008 20 0.090 920 +«—« 44.0 0.99490 3.45 058 10 1595 5.90550 010 22 «0082-380 54.0 99512 382 om 1 1596 6.30510 013 © 23.0078 280400 0.99574 3.42 om 4 159759 0645 012 20-0075 820-440 00547 357 O10 1598 6.00310 047 3.60087 180 «42.0 OWNS 339 In [6]: wine. shape out[6]: (1599, 12) In [7]: #statistical decription of the dataset wine.describe() out [7]: volatile cite acid ®Si8U2 tories free sulfur total sulfur {fixed aciclty acidity sugar dioxide dioxide ‘count 1599.000000 1599.000000 1599,000000 1599,000000 1599,000000 1599.000000 1598.000000 mean 8.319637 0.527821 0.270976 2.538808 0.087467 15.874922 _46.467792 std 1.741096 0.179060 0.194801 1.409928 0.047085 10.480157 32.895324 min 4.600000 0.120000 0.000000 © 0.900000 0.012000 7.000000 6.000000 25% 7.100000 0.890000 0.080000 1.900000 0.070000 7.000000 2.000000 50% 7.900000 0.520000 0.260000 «2.200000 0.078000 14.000000_36.000000, 75% 9.200000 0.640000 0.420000 © 2.800000 0.090000 21.000000 2.000000 max 15900000 1.580000 1.000000 15.500000 0.611000 72.000000 288,000000 localhost 88e8inatebooks/Documents/Wine_ Quality Prediction ipyn® 2n6 712722, 357 PM In [8]: wine. isnull() out(3]: \Wine_Qualiy_ Prediction - Jupytr Notebook fare “Sas Stig sel trae sul str deny pM sulphate. aen 0 Fake Fake Fake Fake Fake Fale False Fake Fale Fake Fe 1 Faso Fake False Fake Fake Fate Fate Fake Fao Fake Fa 2 Fake Fake False Fake Fale Flee Fate Faso ‘Flee Fake Fa 2. Fao Fait Fae Fate Fao Feo Faso Fake Feo Fao Fa 4 Fale Fake False Fabo Fao False Fatso. Fae False Faso Fa 1504 Falso False Faso Fao Faso False False Faso. Fao Fao Fa 1595 False False False False Fao False Falbe Fate Fale Fake Fa 1596 False False Faso Fale Fao Fase Fase Fao. Fake Fao Fa 1507 Falso False Falso False Fao False FalbeFabo Fae Fao Fa 1598 False False False Fale Fae Fae Flee Fale Fae Fake Fa 1899 rows * 12 columns In [9]: wine.columns Qut[9]: Index([‘fixed acidity’, ‘volatile acidity’, ‘citric acid’, ‘residual sugar’, ‘chlorides', "free sulfur dioxide’, ‘total sulfur Gioxide', ‘density’, ‘pH’, ‘sulphates’, ‘alcohol’, ‘quality'], dtype="object') 712722, 357 PM In [10]: wine. nunique out[1@]: In [11]: wine[ ‘quality’ ].value_counts() out [11]: 5 6 7 4 8 3 N localhost 88e8inatebooks!Docu 681 638 199 53 18 10 quality, dtype: intea rls Wine_Qually_ Prediction ipynis 0.700 0.880 0.760 2.280 2.700 2.600 2.550 2.510 28.645 2.318 0.08 2.10 0.13 @.12 0.47 @. seooo @. @. @. @. 51. 40. a) @ 2 0 @ 42. @. @. @. e. @. \Wine_Qualiy_ Prediction - Jupytr Notebook density 99780 99680 99700 99800 99780 99490 99512 99574 99547 99549 pk 20 26 16 51 45; 52 242 57 239 fixed acidity volatile acidity citri 1.9 0.076 2.6 0.098 2.3 0.092 1.9 0.075 1.9 0.076 2.8 0.090 2.2 0.062 2.3 0.076 2.8 2.075 3.6 2.067 sulphates \ 8.56 268 .65 258 56 258 76 75 71 +66 ane 1712122, 9:57 PM \Wine_Qualiy_Presieton - Jupyter Notebook In [12 plt. Figure(Figsize=(10,6)) sns.scatterplot(xewine[ "fixed acidity’ ],y-wine[ ‘volatile acidity’]); 16 . 14 volatile acaity ee e localhost 8808inatebooks/Documents/Wine_ Quality Prediction ipyn® ene 712722, 357 PM In [14]: In [15]: out [15]: In [19]: out [19]: localhost 88e8inatebooks!Docu \Wine_Qualiy_ Prediction - Jupyter Notebook ## Preprocessing the dataset bin: group_nanmé wine[ ‘quality’ ].unique() ['good' wine[ :10] 1, 6.5, 8) 7 "good", ‘bad"] wine[ “quality’]= pd.cut(wine[ ‘quality’ ], bins-bins, labels=group_name) "bad'] Categories (2, object): ['good’ < ‘bad"] ofaet SAR CNS "al choses ar sr deney pt suphatn alcool 0 74 070 000 18 007 110 S40 Osee ast 058 04 178 088 000 28-008 25070 0568 320 © oss as 2 78 076 00 28 00% + 150540. 09070 228 08s 3 112 028 08 1807s 170-500 Osm8D 316 ss 4 7h 070 000 19-007 40—«MKO.O9B7H ast 0588 S74 066 090 18 +007 130400 oes 381 asa © 79 060 00 18 «0089150580 asses 350 04s 7 7 085 000 12-008 © 150-240 osm ag0 047100 8 78 058 oc 20 0073-30180 asR68 356 srs 8 75 050 035 84 wart 170 1020 O987@ 295 0a 105 , in, Predctan me 712722, 357 PM \Wine_Qualiy_Presieton - Jupyter Notebook In [16]: plt.figure(figsize=(10,6)) sns.countplot (wine[ ‘quality’]); :\Users\LENOVO\anaconda3\1ib\site-packages\seaborn\_decorators.py:36: FutureWa ning: Pass the following variable as a keyword arg: x. From version @.12, the only valid positional argument will be “data”, and passing other arguments with out an explicit keyword will result in an error or misinterpretation. warnings.warn( 1400 2200 1000 auality In [26]: label_quality-LabelEncoder() In [28]: wine{‘quality’] = label_quality.Fit_transform(wine[ ‘quality’ ]) localhost 8808inatebooks/Documents/Wine_ Quality Prediction ipyn® ane 712722, 357 PM In [29]: wine[:10] \Wine_Qualiy_ Prediction - Jupyter Notebook out(29]: aed volatile chores sar str deny pi sphates stoke 074070 000 18 007 110 -&40 Os07e ast 0m8 04 178 088 090 26-0088 25070 oes 320 © 08s as 2 78 076 00 29 00% 150-540 09970 228 © 0as 2 12028 08 18 «007s 170 S00 OsmHD 316 Oss 4 74 070 000 18-0711” «BHO O907B 351 ssa 5 7A 068 000 18-0075 130400 ose7e 361 0588 © 79 060 005 18 +0089 150580 sac 350 0488 7 7a 085 00 12-008 © 150-240 mms 909 047100 8 78 058 om 20 0073-80180 oseee 386 srs 8 75 050 038 84 —wart— 1701020 sere 338 080105 In [31]: wine[ ‘quality’ ].value_counts() out{3a]: 24382 8 a7 Wane: quality, dtype: intsa In [32]: ‘ine.drop([ ‘quality’ ],axi 712722, 357 PM \Wine_Qualiy_ Prediction - Jupyter Notebook In [33]: x out [33]: ed volatle cite residusl roves sulfur sulfur density pH sulphates leon acialty ty sugar dioxide dioxide 0 74 0700 0,00 190.076 = 11.0 34,0 0.99780 3.81 0568 1 78 0880 0.00 26 0.098 25.0-«67.0. 0.99680 3.20 oss 8 2 78 0.760 0.04 23 0.092 150 84.0 0.99700 3.26 06s 8 3 12 0280 0.56 19 0.075 «170 ~— «80.0 0.99800 3.16 0588 4 74 0700 0.00 190078 «119-34. 0.99780 3.51 oss 8 1594 © 6.2 0.800 0.08 20 0.090 32.0 © 44,0 0.99490 3.45, oss 10 1595 5.9 0.550 0.10 22 0.082 38.0 $1.0 0.99512 3.62 ore 14 1596 630510 0.13 23 0076 28.0 40.0 0.90574 3.42 on 1 1597 5.90645 0.12 20 0075-320 44.0 0.99547 3.57 on 10 1598 6.0 «0310 0.47 36 0.087 18.0420 0.99549 3.39 oss 11 1599 rows x 11 columns In [36]: out [36]: 1 quality, Length: 1599, dtype: int32 In [37]: # training , testing and spliting the dataset x_train,x_test,y_train,y_test-train_test_split(x,y,test_siz localhost 88e8inatebooks!Docu rls Wine_Qualiy_ Prediction ipyns -25, random_state=4‘ sone 712722, 357 PM In [40]: In [41]: out[4a]: In [42]: In [43]: out [43]: In [44]: In [48]: out [48]: In [58]: out [58]: In [50]: In [51]: out [51]: In [52]: localhost 88e8inatebooks!Docu \Wine_Qualiy_ Prediction - Jupytr Notebook # applying standard scaler to get optimizer result sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test= sc.transform(x_test) x_train array([[-1.09852129, 1.5@992721, -1.38824517, ..., 1.14517889, -@.8026406 , 1.10997762], [-1.09852129, 9.6327804 , -@.30279894, ..., 1.79318041, @.02216045, -0.59560931], [ 1.33427414, 1.70799262, @.52420772, ..., -8.99325583, -@.8026406 , -0.50085448], [-8. 75097909, @.18005946, -0.71630226, ..., 0.17316311, -@.39024008, -1.16413829], [ @.29164752, -@.046301 , @.36914398, 4 ~@.34524531, -0.27241136, -@.8798738 }, [ 1.21842673, -1.51764404, 1.0927748 , 4 0.10836206, 1.25936202, @.44669381]]) USING DECISION TREE CLASSIFIER ALGORITHM dte-DecisionTreeClassifier(random_stat 5) dtc.#it(x_train,y_train) DecisionTreeClassifier(random_state=45) dtc_pred= dtc.predict(x_test) dtc_pred[ :30] aryl, 5L,,L04641144, 1, 1, @, 1, 1, @, 1, 1]) # to determine the accuracy of the model accuracy_score(y_test,dtc_pred)*100 26.0 #USING RANDOM FOREST CLASSIFIER ALGORITHM rfc=RandonForestClassifier(n_estimator: 188) rfc.fit(x_train,y_train) RandonForestClassifier(n_estimators=200) rfc_pred = rfc.predict(x_test) s6 rls Wine_Qually_ Prediction ipynis 712722, 357 PM In [53]: out [53]: In [57]: out [57]: In [59]: In [60]: out [60]: In [61]: In [63]: out [63]: In [64]: out [64]: In [66]: In [67]: out [67]: In [68]: In [70]: out [70]: In [71]: out [71]: In [74]: In [75]: localhost 8te8inatebooks/Documents!Wine_Cual Wine_Qualty_Predicton - Jupyer Notebook rfc_pred[ :30] aryl. L,,L.,L66,,,141 L444, 1 1 y 1, 1, 1, 1, 1, @ 1, 1)) # to determine the accuracy of the model accuracy_score(y_test,rfc_pred)*100 89.5 MUSING KNEAREST NIEGHBOR ALGORITHM knn=KNeighborsClassifier(n_neighbors=5) kon. fit(x_train,y_train) KNeighborsClassifier() knn_pred= knn.predict(x_test) kon_pred[ :30] array([1, 1, 1, ® 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, @, @ 1, 1, 1,1, 1, 1,1, 1, 1, 1, @ 1, 1)) # to determine the accuracy of the model accuracy_score(y_test ,rfc_pred)*100 89.5 nb=GaussianNB() nb. #it(x_train,y_train) GaussianNB() nb_pred-nb.predict(x_test) nb_pred[ :30] array([1, 1, 1, ® 2, 1, ® 1, 2, 1, 1, 1, 1, @ 1, ® @ ® 1, 2, 2, 1,1, 1, 1, 1, @ 1, 1)) # to determine the accuracy of the model accuracy_score(y_test,nb_pred)*100 82.0 # USING VOTING ENSEMBLE TO DECIDE THE BEST MODEL estimators=[( ‘Decision’ ,dtc), (*Random’ ,rfc),(‘KNeighbors' ,knn), (‘Gaussian' ,nb)] Vc= VotingClassifier (estinators=estimators, votin ard") | Prediction py vane 712722, 357 PM In [76]: out [76]: In [77]: In [78]: out [78]: In [79]: out [79]: In [8]: In [81]: In [82]: out [82]: In [83]: In [84]: out [84]: In [85]: out [85]: In [86]: In [87]: localhost 88e8inatebooks!Docum: \Wine_Qualiy_ Prediction - Jupytr Notebook VC. fit(x_train,y train) VotingClassifier(estimators=[(‘Decision', DecisionTreeClassifier(random_state=45)), (CRandon', nandonForestClassifier(n_estimators=200)), Ckeighbors*, KNeighborsClassifier()), (Gaussian’, GaussianNB())]) vc_pred=VC.predict(x_test) ve_pred{ :30] array([1, 1, 1, ® 2,2, 2, 2,2, 1, 2,2, 2, 4, 1, @ ® 2, 4, 1,2, 2, 1,1, 4,1, 1, 1, 1)) # to determine the accuracy of the model accuracy_score(y_test, vc_pred)*10@ 88.0 HUSING BAGGING ENSEMBLE from sklearn.ensemble import BaggingClassifier BC= BaggingClassifier(base_estimator=dtc,n_estimators=10) ac. Fit(x_train,y_teain) BaggingClassifier(base_estimator=DecisionTreeClassifier(random_state=45)) bc_pred=BC. predict (x_test) bc_pred[ :3¢] array([@,1,1,1,1,141,14111,111,4, 11,4, 1, ay 11 LL 2 1,1, 1, 1, 1, 6, D # to determine the accuracy of the model accuracy_score(y_test,bc_pred)*100 87.5 # USING STACKING ENSEMBLE from sklearn.ensemble import StackingClassifier SR= StackingClassifier (estimator: sstimators final_estimator=dtc) sWine_Quality_ Prediction ipyn sane 712722, 357 PM In [88]: out [83]: In [89]: In [98]: out [90]: In [91]: out[91]: In [92]: In [93]: In [94]: localhost 88e8inatebooks!Docu \Wine_Qualiy_ Prediction - Jupyter Notebook SR.fit(x_train,y train) StackingClassifier(estimators=[( ‘Decision’, DecisionTreeClassifier(randon_state=45)), (Random , RandonForestClassifier(n_estimators=200)), (‘KNeighbors', KNeighborsClassifier()), (‘Gaussian’, GaussianNB())], Final_estimator=DecisionTreeClassifier(randon_state=45)) sr_pred-SR. predict(x_test) sr_pred[ :30] array([1, 1,1,1,114,%013411141811,1,1,1,1,1, 1, 1, 1, 1, 1, @ 1, 1]) # to determine the accuracy of the model accuracy_score(y_test, sr_pred)*100 87.5 # USING VOTING ENSEMBLE TO DECIDE THE BEST MODEL from sklearn.ensemble import VotingClassifier (CVoting' VC), (“Bagging' BC), ("Stacking' ,SR)] Vc= VotingClassifier(estimators=estimators, voting="hard') rls Wine_Qually_ Prediction ipynis sane 712722, 357 PM In [95]: out [95]: In [96]: In [97]: out [97]: In [98]: out[98]: In [100]: In [101]: In]: localhost 88e8inatebooks!Docum: \Wine_Qualiy_ Prediction - Jupyter Notebook VC.fit(x_train,y train) VotingClassifier(estimators=[(‘Voting', VotingClassifier(estimators=[( ‘Decision’, DecisionTreeClassif ier(random_state=45)), (‘Random" , RandomForestClassif ier(n_estimators=200)), (‘KNeighbors' , kNeighborsClassifie rO), (‘Gaussian', ‘GaussianNB())])), ‘Bagging’, BaggingClassi fier (base_estimator=DecisionTreeClas sifier(randon_state=45))), (’Stacking’, StackingClassifier(estimators=| (‘Decision", DecisionTreeClass ifier(randon_state=45)), (’Randon', RandonForestClass ifiler(n_estinators=200)), (*kNeighbors', KNeighborsClassif ier()), (‘Gaussian', GaussianNB())], final_estimator=DecisionTreecl assifier(random_state=45)))]) Final_pred=VC. predict (x test) final_pred[:30] array LEoULER6,,44,,8CL141,141 1,1, 1, 1, 1, @ 1, 1]) # to determine the accuracy of the model accuracy_score(y_test,final_pred)*100 88.75 # pickle to save model import pickle with open('wine.plk’,'wb') as Pickle.dump(wine, f, protoco: ickle. HIGHEST_PROTOCOL ) sWine_Quality_ Prediction ipyn 166 712722, 357 PM \Wine_Qualiy_ Prediction - Jupytr Notebook localhost 8808inatebooks/Documents/Wine_ Quality Prediction ipyn® 166

You might also like