Professional Documents
Culture Documents
[24]:
import pandas as pd
import numpy as np
In [14]:
data = pd.read_csv('weatherAUS[1].csv')
data.head()
Out[14]: Date Location MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustDir WindGustSpeed WindDir9am ... Humidity9am Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am Temp3pm Rai
2008-
0 Albury 13.4 22.9 0.6 NaN NaN W 44.0 W ... 71.0 22.0 1007.7 1007.1 8.0 NaN 16.9 21.8
12-01
2008-
1 Albury 7.4 25.1 0.0 NaN NaN WNW 44.0 NNW ... 44.0 25.0 1010.6 1007.8 NaN NaN 17.2 24.3
12-02
2008-
2 Albury 12.9 25.7 0.0 NaN NaN WSW 46.0 W ... 38.0 30.0 1007.6 1008.7 NaN 2.0 21.0 23.2
12-03
2008-
3 Albury 9.2 28.0 0.0 NaN NaN NE 24.0 SE ... 45.0 16.0 1017.6 1012.8 NaN NaN 18.1 26.5
12-04
2008-
4 Albury 17.5 32.3 1.0 NaN NaN W 41.0 ENE ... 82.0 33.0 1010.8 1006.0 7.0 8.0 17.8 29.7
12-05
5 rows × 23 columns
In [19]:
data_selected = data[['Date','Location','Rainfall','RainToday']]
data_selected.head()
In [23]:
data_selected.to_csv('selected_weatherAUS.csv',index =False)
In [25]:
data_selected.shape
Out[25]: (76113, 4)
In [28]:
data.describe()
Out[28]: MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustSpeed WindSpeed9am WindSpeed3pm Humidity9am Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am
count 75001.000000 75175.000000 73884.000000 37280.000000 30464.000000 69991.000000 74732.000000 74118.000000 74292.000000 73864.000000 65523.000000 65576.000000 46036.000000 45078.000000 74632.000000
mean 11.271084 22.257790 2.360311 5.212366 7.327101 40.245032 13.952858 18.564532 71.145978 52.196022 1018.249551 1015.881810 4.619472 4.700963 15.808057
std 6.226131 6.935675 8.293909 4.149631 3.842799 14.389752 9.420504 9.390574 17.848594 20.730859 7.106027 6.979618 2.906949 2.691976 6.050791
min -8.500000 -4.800000 0.000000 0.000000 0.000000 7.000000 0.000000 0.000000 3.000000 1.000000 980.500000 979.000000 0.000000 0.000000 -7.200000
25% 6.800000 17.500000 0.000000 2.400000 4.400000 30.000000 7.000000 11.000000 59.000000 37.000000 1013.500000 1011.200000 1.000000 2.000000 11.500000
50% 11.400000 21.800000 0.000000 4.400000 8.100000 39.000000 13.000000 17.000000 72.000000 52.000000 1018.300000 1016.000000 6.000000 5.000000 16.000000
75% 16.100000 26.600000 0.600000 7.000000 10.400000 48.000000 19.000000 24.000000 85.000000 66.000000 1023.000000 1020.600000 7.000000 7.000000 20.300000
max 31.900000 47.300000 371.000000 145.000000 14.500000 135.000000 130.000000 83.000000 100.000000 100.000000 1040.600000 1037.900000 9.000000 8.000000 37.700000
In [30]:
data.corr()
Out[30]: MinTemp MaxTemp Rainfall Evaporation Sunshine WindGustSpeed WindSpeed9am WindSpeed3pm Humidity9am Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am Temp3pm
MinTemp 1.000000 0.712368 0.101076 0.468007 0.012001 0.211898 0.157577 0.156315 -0.221007 0.020588 -0.411959 -0.392576 0.109352 0.050004 0.905862 0.683673
MaxTemp 0.712368 1.000000 -0.086991 0.572511 0.445459 0.071186 -0.065030 0.014988 -0.442877 -0.521236 -0.308863 -0.384533 -0.267748 -0.255201 0.866169 0.983932
Rainfall 0.101076 -0.086991 1.000000 -0.041697 -0.215746 0.134901 0.107412 0.063653 0.220614 0.256063 -0.133749 -0.090832 0.192255 0.155446 0.009227 -0.090058
Evaporation 0.468007 0.572511 -0.041697 1.000000 0.307476 0.218679 0.167871 0.104002 -0.460185 -0.338377 -0.270844 -0.277833 -0.135360 -0.119450 0.545178 0.551459
Sunshine 0.012001 0.445459 -0.215746 0.307476 1.000000 -0.059152 -0.050572 0.006324 -0.465811 -0.631798 0.041098 -0.010665 -0.687996 -0.704563 0.228259 0.469612
WindGustSpeed 0.211898 0.071186 0.134901 0.218679 -0.059152 1.000000 0.596458 0.711628 -0.238540 -0.007047 -0.484643 -0.442804 0.063316 0.111213 0.191275 0.034639
WindSpeed9am 0.157577 -0.065030 0.107412 0.167871 -0.050572 0.596458 1.000000 0.524098 -0.258571 0.034621 -0.230297 -0.158726 0.030655 0.083260 0.092322 -0.077964
WindSpeed3pm 0.156315 0.014988 0.063653 0.104002 0.006324 0.711628 0.524098 1.000000 -0.169321 0.013125 -0.318806 -0.271910 0.051159 0.038907 0.143146 -0.009731
Humidity9am -0.221007 -0.442877 0.220614 -0.460185 -0.465811 -0.238540 -0.258571 -0.169321 1.000000 0.621574 0.181231 0.202883 0.444933 0.319098 -0.440686 -0.433185
Humidity3pm 0.020588 -0.521236 0.256063 -0.338377 -0.631798 -0.007047 0.034621 0.013125 0.621574 1.000000 0.028144 0.098507 0.508282 0.508961 -0.205990 -0.567771
Pressure9am -0.411959 -0.308863 -0.133749 -0.270844 0.041098 -0.484643 -0.230297 -0.318806 0.181231 0.028144 1.000000 0.961782 -0.116110 -0.133249 -0.396565 -0.265186
Pressure3pm -0.392576 -0.384533 -0.090832 -0.277833 -0.010665 -0.442804 -0.158726 -0.271910 0.202883 0.098507 0.961782 1.000000 -0.064692 -0.085513 -0.415165 -0.349778
Cloud9am 0.109352 -0.267748 0.192255 -0.135360 -0.687996 0.063316 0.030655 0.051159 0.444933 0.508282 -0.116110 -0.064692 1.000000 0.570636 -0.106666 -0.284950
Cloud3pm 0.050004 -0.255201 0.155446 -0.119450 -0.704563 0.111213 0.083260 0.038907 0.319098 0.508961 -0.133249 -0.085513 0.570636 1.000000 -0.087493 -0.299348
Temp9am 0.905862 0.866169 0.009227 0.545178 0.228259 0.191275 0.092322 0.143146 -0.440686 -0.205990 -0.396565 -0.415165 -0.106666 -0.087493 1.000000 0.839755
Temp3pm 0.683673 0.983932 -0.090058 0.551459 0.469612 0.034639 -0.077964 -0.009731 -0.433185 -0.567771 -0.265186 -0.349778 -0.284950 -0.299348 0.839755 1.000000
In [32]:
import seaborn as sns
In [34]:
sns.distplot(data['MinTemp'])
Out[34]: <AxesSubplot:xlabel='MinTemp'>
In [53]:
data1 = data.dropna()
In [54]:
sns.distplot(data1['Rainfall'])
Out[54]: <AxesSubplot:xlabel='Rainfall'>
In [43]:
sns.heatmap(data.corr())
Out[43]: <AxesSubplot:>
In [50]:
data.pairplot(['Rainfall'],['MaxTemp'])
---------------------------------------------------------------------------
<ipython-input-50-5214aaeebfce> in <module>
----> 1 data.pairplot(['Rainfall'],['MaxTemp'])
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5275
In [57]:
data.plot.scatter('MinTemp','MaxTemp')
In [59]:
data.plot.scatter('MinTemp','Sunshine')
In [61]:
data['MaxTemp'].unique()
Out[61]: array([22.9, 25.1, 25.7, 28. , 32.3, 29.7, 25. , 26.7, 31.9, 30.1, 30.4,
21.7, 18.6, 21. , 24.6, 27.7, 20.9, 22.5, 25.6, 29.3, 33. , 31.8,
30.9, 32.4, 33.9, 32.7, 27.2, 24.2, 24.4, 26.5, 23.9, 28.8, 34.6,
35.8, 37.9, 38.9, 28.3, 28.4, 30.8, 32. , 34.7, 37.7, 43. , 32.2,
36.6, 39.9, 38.1, 34. , 35.2, 30.6, 34.3, 38.4, 38.2, 40.7, 41.5,
42.9, 42.7, 43.1, 38.3, 36.9, 41.2, 42.2, 44.8, 40.2, 31.2, 27.3,
21.6, 29. , 29.2, 31.3, 31.1, 29.1, 31.7, 33.2, 29.6, 32.6, 34.5,
30.3, 22.1, 22. , 24. , 27.9, 30.2, 30.5, 25.8, 27. , 19.7, 21.9,
25.3, 33.5, 33.6, 30. , 31.6, 22.3, 29.8, 27.6, 28.9, 29.9, 32.1,
28.1, 21.4, 24.9, 25.4, 20. , 23.4, 22.7, 16.2, 15.8, 12.9, 11.5,
14.5, 12.2, 16.5, 17. , 19.2, 18.9, 19.1, 18.8, 19.3, 18.4, 19. ,
20.5, 19.5, 17.7, 18.5, 15.1, 16.3, 16.6, 16.4, 15.6, 19.8, 21.1,
20.3, 18.1, 15.7, 16.8, 17.1, 14.3, 13.4, 17.4, 16.1, 10.5, 11.6,
12. , 8.8, 9.6, 8.2, 12.4, 14.9, 15.9, 14.7, 15.3, 17.3, 15.5,
14.1, 13.7, 11.9, 12.3, 13.3, 13.2, 12.1, 12.5, 13.8, 15. , 13.5,
13.1, 11. , 12.6, 14.4, 9.8, 12.7, 13.9, 14.8, 15.2, 17.5, 13.6,
12.8, 16.7, 17.9, 11.1, 14.2, 20.7, 16.9, 17.6, nan, 24.7, 17.8,
17.2, 20.4, 20.6, 20.2, 18.2, 18.7, 23.3, 26.6, 27.1, 23.1, 24.1,
26.8, 26.9, 24.3, 23.2, 33.4, 36. , 35.1, 32.8, 35.4, 36.3, 39.7,
23.6, 32.9, 26.2, 26.4, 31.5, 27.5, 30.7, 27.4, 22.6, 38.6, 40.3,
33.7, 35.9, 36.8, 33.8, 36.4, 39.8, 42.4, 25.2, 37.3, 41.8, 34.4,
36.5, 35.6, 26.1, 35. , 28.5, 31. , 26.3, 25.5, 22.4, 28.6, 31.4,
28.7, 24.8, 19.9, 23.5, 25.9, 23. , 10.2, 14. , 8.4, 8.3, 9.4,
10.9, 10.3, 10.8, 14.6, 15.4, 10. , 10.1, 11.8, 16. , 10.4, 13. ,
11.7, 10.7, 19.6, 20.1, 20.8, 23.7, 18. , 21.8, 22.2, 24.5, 29.4,
21.2, 32.5, 28.2, 21.5, 22.8, 11.2, 19.4, 18.3, 9.2, 9.3, 21.3,
23.8, 26. , 27.8, 33.1, 34.9, 36.1, 36.2, 35.3, 35.5, 9.7, 10.6,
11.3, 11.4, 36.7, 37.4, 43.4, 42. , 40.4, 39.2, 38.8, 37. , 35.7,
37.1, 29.5, 37.6, 40.5, 39. , 43.6, 42.6, 34.2, 33.3, 40.6, 41.7,
41.6, 41.1, 40.1, 8.7, 39.6, 34.1, 9.9, 8.6, 9.1, 6.8, 9.5,
9. , 37.2, 34.8, 40. , 38. , 39.5, 40.9, 38.5, 7.5, 39.3, 42.5,
43.7, 37.8, 38.7, 41.9, 39.4, 41.4, 37.5, 45.8, 41. , 39.1, 40.8,
42.8, 45.1, 44.4, 43.5, 42.1, 46.4, 43.9, 45.4, 41.3, 44.3, 46. ,
43.8, 43.2, 45.2, 8.5, 42.3, 7.8, 45.7, 46.6, 47.3, 44.5, 44.1,
44. , 43.3, 46.5, 44.9, 46.9, 45.3, 44.6, 47. , 8.9, 8.1, 7.6,
5.7, 5.2, 5.5, 4.7, 6.5, 2.5, 2.7, 3.4, 2.9, 5. , 3.2,
3.9, 0.4, 3.8, 5.9, -0.7, -1.8, -0.2, 0.9, -1.3, 1.8, 3.1,
6.7, 4.3, -2.4, -0.3, 4.6, 6.9, -2.5, 2.1, 1.4, 5.6, 3.6,
4.4, 5.4, -2.7, -1. , 5.8, 4.8, 1.1, 1.3, -1.4, -0.8, -2. ,
4.5, -0.4, 1.7, -0.9, -2.1, -1.5, -3.1, 6.1, -1.2, 6.4, 3.7,
-1.1, 1.5, -1.9, 0.8, -2.3, -3.7, -2.9, -3.8, 0.7, -4.1, -3.2,
In [63]:
data['MaxTemp'].nunique()
Out[63]: 500
In [25]:
data.isna().sum()
Out[25]: Date 0
Location 0
Rainfall 0
RainToday 0
dtype: int64