You are on page 1of 11
016/21, 11:40 AM Importing our dataset import pandas as pd model pric year 2016 Tas 16000 2017 Gras 15995 2015 Gras 13998 2017 Gras 18998 2017 Gres 17498 toyota. shape out (104 model price year transmission Manual Manual Manual Manual Manual irop ( ['transmi. DECISION TREE for TOYOTA dataset - Jupyler Notebook mileage fuelType 24089 19615 27489 14736 36284 mileage mpg engineSize 2016 Tas 16000 2017 Gras 15995 2015 Tas 13998 2017 Gras 18998 2017 Gras 17498 24088 36.2 18615 36.2 27468 36.2 14736 36.2 36284 36.2 Cleaning dataset 20 20 20 20 20 localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb tax Petrol 265 Petrol 145 Petrol 265 Petrol 150 Petrol 145 , ‘fueltype 36.2 36.2 36.2 36.2 36.2 20 20 20 20 20 wm 1016/21, 1:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook Baon [106]: # number of na values toyota. isna() .sum() out (106): model price mileage mpg engineSize dtype: inted Baon 1 # number of null values toyota. isnull() .sum( out (107) : model price mileage mpg enginesize dtype: inted # group by model toyota.groupby ("model") ["model'] .agg(' count") .sort_values (ascending=False: out (108): model Yaris 2122 Aygo 1961 Auris m2 c-HR 419 RAV4 473 Corolla 267 Prius 232 Avensis 118 verso 114 Hilux 86 cre 73 Land Cruiser 51 PROACE VERSO 15 Supra 12 camry ll 19 8 Urban Cruiser 4 Verso-S 3 Name: model, dtype: intéd localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb amt 016/21, 11:40 AM Bi 09): Len (toyota. groupby ("model") out (10 18 features engineering Bsou [110]: # OneHotBncoder dummies = pa.g dummies head (} DECISION TREE for TOYOTA dataset - Jupyler Notebook tmodel' J .agg ("count ')) et_dummies (toyota.model) out (12 Auis Avensis Aygo jC camry Corolla GT86 Hilux Ia g,t8Md PROACE pug year me 0 0 0 0 0 0 + 09 Oo oO wy 0 60 00 Ot 0 0 mis 0 0 0 0 0 o + 09 0 o 0 m7 0 0 0 0 0 0 + 00 0 0 0 m7 0 809 0 0 Ol 0 0 toyota _new=pd.coneat ([dunmies, toyota], ax toyota _new-head() out (112 ‘Auris Avensis Aygo gz Camry Corolla GT86 Hilux IQ gran’ .. Supra viper year me oo o 0 0 0 0 7 00 Oo ° ¢ m7 0 09 0 0 0 o + 09 0 0 ms 0 0 0 0 0 o + 00 0 0 ¢ m7 0 09 0 0 0 0 00 0 oC m7 0 0 0 0 0 0 + 00 0 0 5 rows * 23 columns localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb ant 1016/21, 1:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook Bron [213]: toyota new=toyota_new.drop(('Auris'], axis=1) toyota_new-head() KeyError Traceback (most recent call last) in => 1 toyota new=toyota new.drop(['Auris'], axis=1) 2 toyota _new.head() ~\anaconda3\1ib\site-packages\pandas\core\frame.py in drop(self, label 8, axis, index, columns, level, inplace, errors) 4306 weight 1.0 0.8 xeturn super () .drop( labels=labels, axis=axis, ~\anaconda3\1ib\site-packages\pandas\core\generic.py in drop(self, lab els, axis, index, columns, level, inplace, errors) for axis, labels in axes. items( if labels is not Non obj = obj._drop_axis (labels, axis, level 4155 if inplace: ~\anaconda3\1ib\site-packages\pandas\core\generic.py in _drop_axis(sel f£, labels, axis, level, errors) new_axis = axis.drop(labels, level=level, erro else: new_axis = axis.drop(labels, errors=errors) result = self.reindex(**(axis_name: new_axis}) ~\anaconda3\1ib\site-packages\pandas\core\indexes\base.py in drop (sel £, labels, errors) if mask.any(): if errors != "ignore raise KeyRrror(£"(labels[mask]} not found in a indexer = indexer [~mask] return self.delete (indexer) KeyError: "[‘Auris') not found in axis" localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb amt 1016/21, 1:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook Baon (81): model_new=code.fit_transform(toyota['model" J) ValueError Traceback (most recent call last) in “> 1 mode1_new=code. fit_transform(toyota['model']) ~\anaconda3\1ib\site-packages\sklearn\preprocessing\_encoders.py in fi t_transform(self, X, y) 442 self._validate_keywords () --> 443 return super ().fit_transform(X, y) 444 445 def transform(self, x): ~\anaconda3\1ib\site-packages\sklearn\base.py in fit _transform(sel£, X, y, **£it_params) 687 if y is None # fit method of arity 1 (unsupervised transformati return self.fit(x, *#fit_params) .transform(x) else: # fit method of arity 2 (supervised transformatio ~\anaconda3\1ib\site-packages\sklearn\preprocessing\_encoders.py in fi z(self, X, y) 416 self._validate_keywords() => 417 self. fit (x, handle_unknown=self.handle_unknown, a8 foree_all_finite='allow-nan') 419 self£.drop_idx_ = self._compute_dzop_idx() ~\anaconda3\1ib\site-packages\sklearn\preprocessing\_encoders.py in f it(self, X, handle_unknown, force_all_finite) def _fit(self, X, handle_unknown="error', force_all_finite X_list, n_samples, n_features = self._check_X( X, force_all_finite=force_all_finite) ~\anaconda3\1ib\site-packages\sklearn\preprocessing\_encoders.py in _. heck X(self, X, force all_finite) 2 if not (hasattr(X, ‘iloc') and getattr(X, ‘ndim', 0) = 2): 43 # if not a dataframe, do normal check_array valida tion ---> 44 X_temp = check_array(X, dtype=None, 45 foree_all_finite=force_all_fi nite) 46 if (not hasattr (x, ‘dtype') ~\anaconda3\1ib\site-packages\sklearn\utils\validation.py in i args, **kwargs) 6 extra_args = len(args) - len(al 62 Af extra_args <= 0: args) localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb sit 1016/21, 1:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook ---> 63 return £(*args, **kwargs) 64 65 # extra_args > 0 ~\anaconda3\1ib\site-packages\sklearn\utils\validation.py in che ay(array, accept_sparse, accept_large_sparse, dtype, order, copy, force e_all_finite, ensure_2d, allow_Ad, ensure_min_samples, ensure_min_feat ures, estimator) 635 # If input is 1D raise error 636 if array.ndim == 1 --> 637 raise ValueError( 638 “Expected 2D array, got 1D array instea 4:\narray=(}.\n" 639 “Reshape your data either using array.resh ape(-1, 1) if " ValueError: Expected 2D array, got 1D array instead: array=[' GT86' ' GT86' ' GT86' .., ' Urban Cruiser’ ' Urban Cruiser! * Urban Cruiser']. Reshape your data either using array.reshape(-1, 1) if your data has a single feature or arzay.reshape(1, -1) if it contains a single sample. Baon Xetoyoral['price', 'transmission', 'engineSize']] X.shape out (24 (6738, 3) Beon [25 X.head () out (25 price transmission engineSize year 2016 16000 Manual 20 2017 15995 Manual 20 2015 13998 Manual 20 2017 18098 Manual 20 2017 17498 Manual 20 localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb ent 016/21, 11:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook Beom [26]: oyotal ['model"} ] y-head () Out (26): model year 2016 GTE6 2017 GTE6 2015 GTE6 2017 GT86 2017 GT86 transforming some columns of our dataset into numericals localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb ™ 1016/21, 1:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook Baon (38): y['model_num' |=LE_model.fit_transform(y|'mode.']) X["transmission_num']=LE_model.fit_transform(X['transmission']) X[‘engineSize_num']=LE_model. fit_transform(x["engineSize']) :1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas ~docs/stable/user_guide/indexing.htmlfreturning-a-vicw-versus-a-copy (https: //pandas .pydata.org/pandas-docs/stable/user_guide/indexing.htm lareturaing-a-view-versus-a-copy) y{'model_num'}=L6_model.fit_transform(y['model'}) :2: SettingWithCopywarning: A value is trying to be set on a copy of a slice from a Datafrane. Try using .loc(row indexer,col indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas -docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy (https: //pandas .pydata.org/pandas-docs/stable/user_guide/indexing.ntm L#returning-a-view-versus-a-copy) X(‘transmission_num')=LE_model.fit_transform(x(*transmission']) :3: SettingW@ithCopyWarning: A value is trying Lo be set on a copy of a slice from a Data¥rane. Try using .loc[row_indexer,col_indexer) = value instead See the caveats in the documentation: nttps://pandas.pydata.org/pandas -docs/stable/user_guide/indexing.htmlfreturning-a-view-versus-a-copy (https: //pandas .pydata.org/pandas-docs/stable/user_guide/indexing.ntm lreturning-a-view-versus-a-copy) X[‘engineSize_num']=LE_model. fit_transform(x['engineSize']) localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb ant 016/21, 11:40 AM x 39 out (39. year 2016 2017 2015 2017 2017 zon 2011 2012 2011 2011 DECISION TREE for TOYOTA dataset -Jupyler Notebook price transmission engineSize transmission_num engineSize_num +6000 15995 13998 18998 17498 5500 4985 4995 3095 4495 Manual Manual Manual Manual Manual Automatic Manual Manual Manual Manual 6738 rows x 5 columns X_ne X new out (40 year 2016 2017 2015 2017 2017 2011 zon 2012 zon 211 XI ['pr price transmission_num 16000 15995 13998 18998 17498 5500 4985 4995 3995 4495 6738 rows x 3 columns 20 1 8 20 1 8 20 1 a 20 1 a 20 1 8 10 o 1 13 1 3 14 1 4 13 1 3 13 1 3 ytengineSize_num']] engineSize_num localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb ont 016/21, 11:40 AM Baox y_newsy | ['model ynew out (41 model_num year 2016 6 2017 6 2015 6 2017 6 2017 6 2011 8 2011 14 2012 14 2011 14 zon 14 pum") J 6738 rows x 1 columns DECISION TREE for TOYOTA dataset - Jupyler Notebook getting started with Decision tree classifier Bson [58]: from sklearn. Bao 1 model_tree. fit (X_new,y out DecisionTreeClassif DecisionTreeClassifier (max_dept! onTreeClassi: x (criterion ‘entropy’, max_depth=30) localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb criterion='entropy") tom 016/21, 11:40 AM DECISION TREE for TOYOTA dataset - Jupyler Notebook Baom [ Jt localhost 8888 /notebooks/DECISION TREE for TOYOTA dataset.ipynb wnt

You might also like