You are on page 1of 13
9122122, 752 PM Logreg Name: Shruti Suman ROLL: MCA/10011/21 #logistic Regression (1) import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import Standardscaler from sklearn.model_selection import train_test_split from sklearn import preprocessing from sklearn.linear_madel import LogisticRegression plt.re("font", size=7) data=pd.read_csv("foodPrices.csv") data rt date admint admin2__—market latitude longitude category commodity unit 15 0 o1- Deh Delhi Delh 28666667 7.216667 “ereals anc Rice KG 1908 tubers 6 cereal 1 ol Delh Delhi el aasnens7 7716667 SEAN hay KC sea tubers i miscellaneous 2 01 Deh Delhi Delh 28666667 77.216667 wus Sugar KG 1994 e oll 3 01. Deh Delhi Delh 28666667 7.216667 oiland fats. stay KE (mustard 1994 & cereals and 4 O1- Gujarat Ahmadabad Ahmedabad 23033333 72616667 “92 Rice KG ers 1994 15 West West ulses anc Lentils rraotz o7- West Mest Gharagpur 22330330 7.226720 PUES ents Ke sels Bengal Vidnapore aut (masur ae West West uses and Lentils 12013 o7- ee idnapore “haragpur 2.339330 87.226720 PUSS Lentils gg apts Bengal Vidnapore auts (moon 15 West West pulses and Lentils 12014 07- Sharagpur 22.339330 87.226720 tents KG soup Bengal Vidnapore nuts furad localhost 8889inbconverthimll.ogReg ipynb?download=tas ana 9122122, 752 PM LogReg date adi admin2 market latitude longitude category commodity unit Ae West West vegetables yrois 07. Nes St charagpur 22339330 87226720 °° Onions «6 aoss Bengal Midnapore and fruits Ae west West vegetables 172016 07- haragpur 22339330 7.226720 Tomatoes KG aop Bengal Midnapore and fruits 171228 rows x 14 columns rT » ; for col in data.colunns: print( 158, dtype: intea 165, dtype: intea ana 9122122, 752 PM Logreg milk and dairy eae Name: category, dtype: inted sns.countplot(x='category', datasdata, palett: plt.show plt.savefig( ‘count_plot") ls*) datal ‘commodity ].value_counts() ejoy, Rice 12436 Sugar 11836 heat 11263 041 (mustard) 10821 Potatoes 8804 Onions 8754 heat flour 8235 Salt (iodised) 8113 Tomatoes 8106 Milk (pasteurized) 8055 Lentils (masur) 7910 Lentils 7648 Tea (black) 7541 011 (sunflower) 7153 Lentils (moong) 6548 Lentils (urad) 6525 Sugar (jaggery/gur) 6394 041 (groundnut ) 6373 Shee (vanaspati) 6248 oil (palm) 6024 041 (soybean) 5248 Chickpeas 998 Milk 991 Name: commodity, dtype: intea sns.countplot(x=' commodity’, data-data, palette='his') plt.show In datal ‘pricetype" ].value_counts() Retail 169748 wholesale 277 Name: pricetype, dtype: int6a 7 sns.countplot(x='pricetype’ ,data-data, palett: his") 1), localhost 8889inbconverthimll.ogReg ipynb?download=tas 18. 28. B. 25. 2. 283333, 715067 430988 206324 935890 116667 975000 666667 083333 600000 033333 273579 807751 989440 816590 666667 4080 231 231 231 225 123 data[ "latitude" .plot-hist (bins=100) data[ ‘latitude’ ].value_counts() 4364 4345 408e 4043 309¢ 231 231 231 225 123 data[ "latitude" ].plot.hist (bins=1e@) LogReg ana 9122122, 752 PM BEERS EE ueysyy, 02667 355 0.5333 303 0.6667 233 .2933 219 . 3333 214 v3.e7 a azaa 1 sig 64.4991 1 23021 Name: usdprice, Length: 29493, dtype Data Wrangling 80 datacinfo() data[ ‘usdprice’ ].value_counts() RangelIndex: 172017 entries, @ to 172016 Data colunns (total 14 columns): 10 nn 2 B dtypes: float64(4), object (10) memory usage: 18.4+ MB Column date admint admin2 market latitude Longitude category conmodity unit priceflag pricetype currency price usdprice Non-Null Count 172017 171228 171228 172017 171228 171228 172017 172017 172017 172017 172017 172017 172017 172017 non-null non-null non-null non-null non-null non-null non-null non-null non-null non-null non-null non-null non-null non-null localhost 8889inbconverthimll.ogReg ipynb?download=tas Dtype object object object object Floated Floate4 object object object object object object float64 Float64 inte4 Logreg ona 9122122, 752 PM Logreg data.isnull() #checks missing data and result is in boolean format 4 172012 172013 172014 172015 172016 date False False False False False False False False False False admint False False False False False False False False False False admin2 False False False False False False False False False False 172017 rows x 14 columns data. isnull()-sum() date adnind admin2 market latitude Longitude category conmodity unit priceflag pricetype currency price usdprice dtyp inte4 78s 78s 788 788 sns.heatmap(data.isnul1()) localhost 8889inbconverthimll.ogReg ipynb?downloac=talse market False False False False False False False False False False latitude False False False False False False False False False False longitude category False False False False False False False False False False False False False False False False False False False False commodity False False False False False False False False False False unit False False False False False False False False False False riceflag False False False False False False False False False False pr ron 9722122, 752 PM LogReg data. dropna( inplac rue) sns.heatmap(data.isnul1()) data. isnul1()-sum() date adnind admin2 market latitude longitude category conmodity unit oriceflag aricetype currency localhost a#eginconvarthimll.ogReg inynb?downloac=false wns 9122122, 752 PM Logreg price e usdprice —@ dtype: inted ; X = data-iloc[:, :-1] y = data.iloc(:, -1) # split the dataset X_train, X test, y train, y test = train_test_split( X, y, test_size=0.75, randon_state=@) X train date admin2 market latitude longitude category commodity ut 15 cereal 10179504 Orisse Cuttack Cuttack 20.500000 95833333 and Rice | 2020 tubers te pulses Lentils 80356 94 Asan Kamrup Guwahati 26103333 91733338 FES = pulses enti 82141 05- Orisse__—Sundargarh-—~Rourkela 22260423 4953se Pulses enti a and nuts (masur) TE Andhre pulses asoe7 12 3Rfeur ishkbapaton Visakhapatram 17687530 S3218270 fe ents e tk and Mik 74695 11- Assam Kamup Guwahati 26.8333, 91733333, "Kan y 2017 jaty (pasteurized) goaze gr. Madhye Jabalpur Jabalpur 23.181467 79986407 Hand ol none Pradesh e pur fats (soybean) 15 ear ciland Ghee 96M ates Lucknow sucknow 26850000 angi6ee7 NE GI 153104 02: cujost Nomads —Semba stems Taazrane FAME | Ol 202 ‘armade Selambe 2 fats (sunflower) Ae uta land oil rie741 02. Uttar Varanasi Varanasi 25333333 83.000000 a tows soo Pradesh fats (unflower) e land Gh 4358807. Gujarat —Abmadabac Ahmedabad 23033333 ratse7 la ee sors fats (vanaspati) 42807 rows x 13 columns « » localhost 8889inbconverthimll.ogReg ipynb?downloac=talse vane 9122122, 752 PM Logreg y_train 101795 0.3657 80356 © 1.1028 2141 @.7052 146087 1.4236 74695 0.8581 98428 1.1741 96728 1.4628 153104 2.0667 118741 2.1018 43588 Name: usdprice, Length: 42807, dtype: floate4 localhost 8889inbconverthimll.ogReg ipynb?downloac=talse rata

You might also like