You are on page 1of 1

In 

[24]:
import pandas as pd

from scipy import stats

import numpy as np

import matplotlib.pyplot as plt

In [14]:
data = pd.read_csv('weatherAUS[1].csv')

data.head()

Out[14]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGustSpeed WindDir9am ... Humidity9am Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am Temp3pm Rai

2008-
0 Albury 13.4 22.9 0.6 NaN NaN W 44.0 W ... 71.0 22.0 1007.7 1007.1 8.0 NaN 16.9 21.8
12-01

2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW 44.0 NNW ... 44.0 25.0 1010.6 1007.8 NaN NaN 17.2 24.3
12-02

2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW 46.0 W ... 38.0 30.0 1007.6 1008.7 NaN 2.0 21.0 23.2
12-03

2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE 24.0 SE ... 45.0 16.0 1017.6 1012.8 NaN NaN 18.1 26.5
12-04

2008-
4 Albury 17.5 32.3 1.0 NaN NaN W 41.0 ENE ... 82.0 33.0 1010.8 1006.0 7.0 8.0 17.8 29.7
12-05

5 rows × 23 columns

In [19]:
data_selected = data[['Date','Location','Rainfall','RainToday']]

data_selected.head()

Out[19]: Date Location Rainfall RainToday

0 2008-12-01 Albury 0.6 No

1 2008-12-02 Albury 0.0 No

2 2008-12-03 Albury 0.0 No

3 2008-12-04 Albury 0.0 No

4 2008-12-05 Albury 1.0 No

In [23]:
data_selected.to_csv('selected_weatherAUS.csv',index =False)

In [25]:
data_selected.shape

Out[25]: (76113, 4)

In [28]:
data.describe()

Out[28]: MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustSpeed WindSpeed9am WindSpeed3pm Humidity9am Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am

count 75001.000000 75175.000000 73884.000000 37280.000000 30464.000000 69991.000000 74732.000000 74118.000000 74292.000000 73864.000000 65523.000000 65576.000000 46036.000000 45078.000000 74632.000000

mean 11.271084 22.257790 2.360311 5.212366 7.327101 40.245032 13.952858 18.564532 71.145978 52.196022 1018.249551 1015.881810 4.619472 4.700963 15.808057

std 6.226131 6.935675 8.293909 4.149631 3.842799 14.389752 9.420504 9.390574 17.848594 20.730859 7.106027 6.979618 2.906949 2.691976 6.050791

min -8.500000 -4.800000 0.000000 0.000000 0.000000 7.000000 0.000000 0.000000 3.000000 1.000000 980.500000 979.000000 0.000000 0.000000 -7.200000

25% 6.800000 17.500000 0.000000 2.400000 4.400000 30.000000 7.000000 11.000000 59.000000 37.000000 1013.500000 1011.200000 1.000000 2.000000 11.500000

50% 11.400000 21.800000 0.000000 4.400000 8.100000 39.000000 13.000000 17.000000 72.000000 52.000000 1018.300000 1016.000000 6.000000 5.000000 16.000000

75% 16.100000 26.600000 0.600000 7.000000 10.400000 48.000000 19.000000 24.000000 85.000000 66.000000 1023.000000 1020.600000 7.000000 7.000000 20.300000

max 31.900000 47.300000 371.000000 145.000000 14.500000 135.000000 130.000000 83.000000 100.000000 100.000000 1040.600000 1037.900000 9.000000 8.000000 37.700000

In [30]:
data.corr()

Out[30]: MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustSpeed WindSpeed9am WindSpeed3pm Humidity9am Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am Temp3pm

MinTemp 1.000000 0.712368 0.101076 0.468007 0.012001 0.211898 0.157577 0.156315 -0.221007 0.020588 -0.411959 -0.392576 0.109352 0.050004 0.905862 0.683673

MaxTemp 0.712368 1.000000 -0.086991 0.572511 0.445459 0.071186 -0.065030 0.014988 -0.442877 -0.521236 -0.308863 -0.384533 -0.267748 -0.255201 0.866169 0.983932

Rainfall 0.101076 -0.086991 1.000000 -0.041697 -0.215746 0.134901 0.107412 0.063653 0.220614 0.256063 -0.133749 -0.090832 0.192255 0.155446 0.009227 -0.090058

Evaporation 0.468007 0.572511 -0.041697 1.000000 0.307476 0.218679 0.167871 0.104002 -0.460185 -0.338377 -0.270844 -0.277833 -0.135360 -0.119450 0.545178 0.551459

Sunshine 0.012001 0.445459 -0.215746 0.307476 1.000000 -0.059152 -0.050572 0.006324 -0.465811 -0.631798 0.041098 -0.010665 -0.687996 -0.704563 0.228259 0.469612

WindGustSpeed 0.211898 0.071186 0.134901 0.218679 -0.059152 1.000000 0.596458 0.711628 -0.238540 -0.007047 -0.484643 -0.442804 0.063316 0.111213 0.191275 0.034639

WindSpeed9am 0.157577 -0.065030 0.107412 0.167871 -0.050572 0.596458 1.000000 0.524098 -0.258571 0.034621 -0.230297 -0.158726 0.030655 0.083260 0.092322 -0.077964

WindSpeed3pm 0.156315 0.014988 0.063653 0.104002 0.006324 0.711628 0.524098 1.000000 -0.169321 0.013125 -0.318806 -0.271910 0.051159 0.038907 0.143146 -0.009731

Humidity9am -0.221007 -0.442877 0.220614 -0.460185 -0.465811 -0.238540 -0.258571 -0.169321 1.000000 0.621574 0.181231 0.202883 0.444933 0.319098 -0.440686 -0.433185

Humidity3pm 0.020588 -0.521236 0.256063 -0.338377 -0.631798 -0.007047 0.034621 0.013125 0.621574 1.000000 0.028144 0.098507 0.508282 0.508961 -0.205990 -0.567771

Pressure9am -0.411959 -0.308863 -0.133749 -0.270844 0.041098 -0.484643 -0.230297 -0.318806 0.181231 0.028144 1.000000 0.961782 -0.116110 -0.133249 -0.396565 -0.265186

Pressure3pm -0.392576 -0.384533 -0.090832 -0.277833 -0.010665 -0.442804 -0.158726 -0.271910 0.202883 0.098507 0.961782 1.000000 -0.064692 -0.085513 -0.415165 -0.349778

Cloud9am 0.109352 -0.267748 0.192255 -0.135360 -0.687996 0.063316 0.030655 0.051159 0.444933 0.508282 -0.116110 -0.064692 1.000000 0.570636 -0.106666 -0.284950

Cloud3pm 0.050004 -0.255201 0.155446 -0.119450 -0.704563 0.111213 0.083260 0.038907 0.319098 0.508961 -0.133249 -0.085513 0.570636 1.000000 -0.087493 -0.299348

Temp9am 0.905862 0.866169 0.009227 0.545178 0.228259 0.191275 0.092322 0.143146 -0.440686 -0.205990 -0.396565 -0.415165 -0.106666 -0.087493 1.000000 0.839755

Temp3pm 0.683673 0.983932 -0.090058 0.551459 0.469612 0.034639 -0.077964 -0.009731 -0.433185 -0.567771 -0.265186 -0.349778 -0.284950 -0.299348 0.839755 1.000000

In [32]:
import seaborn as sns

In [34]:
sns.distplot(data['MinTemp'])

Out[34]: <AxesSubplot:xlabel='MinTemp'>

In [53]:
data1 = data.dropna()

In [54]:
sns.distplot(data1['Rainfall'])

Out[54]: <AxesSubplot:xlabel='Rainfall'>

In [43]:
sns.heatmap(data.corr())

Out[43]: <AxesSubplot:>

In [50]:
data.pairplot(['Rainfall'],['MaxTemp'])

---------------------------------------------------------------------------

AttributeError Traceback (most recent call last)

<ipython-input-50-5214aaeebfce> in <module>

----> 1 data.pairplot(['Rainfall'],['MaxTemp'])

/usr/local/lib/python3.7/site-packages/pandas/core/generic.py in __getattr__(self, name)

5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):

5273 return self[name]

-> 5274 return object.__getattribute__(self, name)

5275

5276 def __setattr__(self, name: str, value) -> None:

AttributeError: 'DataFrame' object has no attribute 'pairplot'

In [57]:
data.plot.scatter('MinTemp','MaxTemp')

Out[57]: <AxesSubplot:xlabel='MinTemp', ylabel='MaxTemp'>

In [59]:
data.plot.scatter('MinTemp','Sunshine')

Out[59]: <AxesSubplot:xlabel='MinTemp', ylabel='Sunshine'>

In [61]:
data['MaxTemp'].unique()

Out[61]: array([22.9, 25.1, 25.7, 28. , 32.3, 29.7, 25. , 26.7, 31.9, 30.1, 30.4,

21.7, 18.6, 21. , 24.6, 27.7, 20.9, 22.5, 25.6, 29.3, 33. , 31.8,

30.9, 32.4, 33.9, 32.7, 27.2, 24.2, 24.4, 26.5, 23.9, 28.8, 34.6,

35.8, 37.9, 38.9, 28.3, 28.4, 30.8, 32. , 34.7, 37.7, 43. , 32.2,

36.6, 39.9, 38.1, 34. , 35.2, 30.6, 34.3, 38.4, 38.2, 40.7, 41.5,

42.9, 42.7, 43.1, 38.3, 36.9, 41.2, 42.2, 44.8, 40.2, 31.2, 27.3,

21.6, 29. , 29.2, 31.3, 31.1, 29.1, 31.7, 33.2, 29.6, 32.6, 34.5,

30.3, 22.1, 22. , 24. , 27.9, 30.2, 30.5, 25.8, 27. , 19.7, 21.9,

25.3, 33.5, 33.6, 30. , 31.6, 22.3, 29.8, 27.6, 28.9, 29.9, 32.1,

28.1, 21.4, 24.9, 25.4, 20. , 23.4, 22.7, 16.2, 15.8, 12.9, 11.5,

14.5, 12.2, 16.5, 17. , 19.2, 18.9, 19.1, 18.8, 19.3, 18.4, 19. ,

20.5, 19.5, 17.7, 18.5, 15.1, 16.3, 16.6, 16.4, 15.6, 19.8, 21.1,

20.3, 18.1, 15.7, 16.8, 17.1, 14.3, 13.4, 17.4, 16.1, 10.5, 11.6,

12. , 8.8, 9.6, 8.2, 12.4, 14.9, 15.9, 14.7, 15.3, 17.3, 15.5,

14.1, 13.7, 11.9, 12.3, 13.3, 13.2, 12.1, 12.5, 13.8, 15. , 13.5,

13.1, 11. , 12.6, 14.4, 9.8, 12.7, 13.9, 14.8, 15.2, 17.5, 13.6,

12.8, 16.7, 17.9, 11.1, 14.2, 20.7, 16.9, 17.6, nan, 24.7, 17.8,

17.2, 20.4, 20.6, 20.2, 18.2, 18.7, 23.3, 26.6, 27.1, 23.1, 24.1,

26.8, 26.9, 24.3, 23.2, 33.4, 36. , 35.1, 32.8, 35.4, 36.3, 39.7,

23.6, 32.9, 26.2, 26.4, 31.5, 27.5, 30.7, 27.4, 22.6, 38.6, 40.3,

33.7, 35.9, 36.8, 33.8, 36.4, 39.8, 42.4, 25.2, 37.3, 41.8, 34.4,

36.5, 35.6, 26.1, 35. , 28.5, 31. , 26.3, 25.5, 22.4, 28.6, 31.4,

28.7, 24.8, 19.9, 23.5, 25.9, 23. , 10.2, 14. , 8.4, 8.3, 9.4,

10.9, 10.3, 10.8, 14.6, 15.4, 10. , 10.1, 11.8, 16. , 10.4, 13. ,

11.7, 10.7, 19.6, 20.1, 20.8, 23.7, 18. , 21.8, 22.2, 24.5, 29.4,

21.2, 32.5, 28.2, 21.5, 22.8, 11.2, 19.4, 18.3, 9.2, 9.3, 21.3,

23.8, 26. , 27.8, 33.1, 34.9, 36.1, 36.2, 35.3, 35.5, 9.7, 10.6,

11.3, 11.4, 36.7, 37.4, 43.4, 42. , 40.4, 39.2, 38.8, 37. , 35.7,

37.1, 29.5, 37.6, 40.5, 39. , 43.6, 42.6, 34.2, 33.3, 40.6, 41.7,

41.6, 41.1, 40.1, 8.7, 39.6, 34.1, 9.9, 8.6, 9.1, 6.8, 9.5,

9. , 37.2, 34.8, 40. , 38. , 39.5, 40.9, 38.5, 7.5, 39.3, 42.5,

43.7, 37.8, 38.7, 41.9, 39.4, 41.4, 37.5, 45.8, 41. , 39.1, 40.8,

42.8, 45.1, 44.4, 43.5, 42.1, 46.4, 43.9, 45.4, 41.3, 44.3, 46. ,

43.8, 43.2, 45.2, 8.5, 42.3, 7.8, 45.7, 46.6, 47.3, 44.5, 44.1,

44. , 43.3, 46.5, 44.9, 46.9, 45.3, 44.6, 47. , 8.9, 8.1, 7.6,

6.3, 8. , 6.6, 45. , 45.5, 4.1, 7.7, 7.1, 7.9, 7. , 7.3,

2.6, 7.4, 6.2, 7.2, 6. , 4.9, -1.7, 0. , 2.4, 5.3, 2.3,

5.7, 5.2, 5.5, 4.7, 6.5, 2.5, 2.7, 3.4, 2.9, 5. , 3.2,

0.1, -0.1, -2.2, -0.6, 0.6, 2.2, 4. , 3.5, 3. , 4.2, 3.3,

3.9, 0.4, 3.8, 5.9, -0.7, -1.8, -0.2, 0.9, -1.3, 1.8, 3.1,

1.6, 0.3, 1.2, -0.5, 1. , 0.5, 0.2, 1.9, 2. , 5.1, 2.8,

6.7, 4.3, -2.4, -0.3, 4.6, 6.9, -2.5, 2.1, 1.4, 5.6, 3.6,

4.4, 5.4, -2.7, -1. , 5.8, 4.8, 1.1, 1.3, -1.4, -0.8, -2. ,

4.5, -0.4, 1.7, -0.9, -2.1, -1.5, -3.1, 6.1, -1.2, 6.4, 3.7,

-1.1, 1.5, -1.9, 0.8, -2.3, -3.7, -2.9, -3.8, 0.7, -4.1, -3.2,

-3. , -4.8, 44.2, 46.8, 46.7, 46.3])

In [63]:
data['MaxTemp'].nunique()

Out[63]: 500

In [25]:
data.isna().sum()

Out[25]: Date 0

Location 0

Rainfall 0

RainToday 0

dtype: int64

You might also like