You are on page 1of 5

#!

/usr/bin/env python
# coding: utf-8

# **Run the Cell to import the packages**

# In[1]:

import pandas as pd
import numpy as np
import dataframe as df

# **Data Loading**
# **Fill in the Command to load your CSV dataset "weather.csv" with pandas**

# In[2]:

weather = pd.read_csv('weather.csv', sep=',')

m
# **Data Analysis**

er as
#

co
# - Get the shape of the dataset and print it.

eH w
#
# - Get the column names in list and print it.

o.
# rs e
# - Describe the dataset to understand the basic statistics of the dataset.
ou urc
#
# - Print the first three rows of the dataset

# In[5]:
o
aC s
v i y re

data_size=weather.size

print(data_size)

weather_col_names =weather.columns
ed d
ar stu

print(weather_col_names)

print( weather.describe() )

print( weather.iloc[:3] )
sh is
Th

# **Target Identification**
#
# Execute the below cell to identify the target variables. If yes it will Rain
Tommorow otherwise it will not Rain.

# In[6]:

weather_target=weather['RainTomorrow']

print(weather_target)

# **Feature Identification**
#
# In our case by analyzing the dataset, we can understand that the columns like

This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00

https://www.coursehero.com/file/79338366/structured-testpy/
**Date** might be irrelevant as they are not dependent on call usage pattern.
#
# Since **RainTomorrow** is our target variable, we will be removing it from the
feature set.
#
# - Perform appropriate operation to drop the columns **Date** and
**RainTomorrow**

# In[10]:

cols_to_drop = ['Date','RainTomorrow']

weather_feature = weather.drop(columns=cols_to_drop)

print(weather_feature.head(5))

# **Categorical Data**
#
# In order to Identify the categorical variable in a data, use the following
command in the below cell,

m
er as
# In[11]:

co
eH w
weather_categorical = weather.select_dtypes(include=[object])

o.
print(weather_categorical.head(15)) rs e
ou urc
# **Convert to boolean**
#
# Assign the column **RainToday** for the variable **yes_no_cols** and run the
o

below cell to print first 5 rows of **weather_feature**


aC s

#
v i y re

# In[14]:

yes_no_cols = ["RainToday"]
ed d
ar stu

weather_feature[yes_no_cols] = weather_feature[yes_no_cols] == 'Yes'

print(weather_feature.head(5))
sh is

# **One Hot Encoding**


#
Th

# Execute the below cells to perform **One Hot Encoding**

# In[15]:

weather_dumm=pd.get_dummies(weather_feature,
columns=["Location","WindGustDir","WindDir9am","WindDir3pm"],
prefix=["Location","WindGustDir","WindDir9am","WindDir3pm"])

weather_matrix = weather_dumm.values.astype(np.float)

# **Imputing-Missing Values**
#
# Do the Imputing-Missing Values by using the following parameters
#

This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00

https://www.coursehero.com/file/79338366/structured-testpy/
# - missing_values=np.nan
# - strategy=mean
# - fill_value=None
# - verbose=0
# - copy=True
#

# In[16]:

from sklearn.impute import SimpleImputer

imp=SimpleImputer( missing_values=np.nan, strategy='mean' ,fill_value=None


,verbose=0 ,copy=True )

weather_matrix=imp.fit_transform(weather_matrix)

# **Standardization**
#
# Run the below cell to perform standardization

m
# In[17]:

er as
co
eH w
from sklearn.preprocessing import StandardScaler

o.
#Standardize the data by removing the mean and scaling to unit variance
rs e
ou urc
scaler = StandardScaler()

#Fit to data, then transform it.


o

weather_matrix = scaler.fit_transform(weather_matrix)
aC s
v i y re

# **Train and Test Data**


#
# Splitting the data for training and testing(90% train,10% test)
#
ed d

# - Perform train-test split on **weather_matrix** and **weather_target** with


ar stu

90% as train data and 10% as test data and set random_state as seed.

# In[20]:
sh is

from sklearn.model_selection import train_test_split


Th

seed=5000
train_data, test_data, train_label, test_label =
train_test_split(weather_matrix, weather_target,train_size=.9, test_size=0.1,
random_state=seed)

# **Decision Tree Classification**


#
# - Initialize **SVM** classifier with following parameters
# - kernel = linear
# - C= 0.025
# - random_state=seed
#
# - Train the model with train_data and train_label
#
# - Now predict the output with test_data

This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00

https://www.coursehero.com/file/79338366/structured-testpy/
#
# - Evaluate the classifier with score from test_data and test_label
#
# - Print the predicted score
#
#

# In[24]:

from sklearn.svm import SVC

classifier = SVC( kernel = 'linear', C= 0.025 ,random_state=seed


)

classifier = classifier.fit( train_data, train_label )

churn_predicted_target=classifier.predict( test_data )

score = classifier.score( test_data, test_label )

print('SVM Classifier : ', score )

m
er as
with open('output.txt', 'w') as file:

co
file.write(str(np.mean(score)))

eH w
o.
# **Random Forest Classifier** rs e
#
ou urc
# - Do the **Random Forest** Classifier of the Dataset using the following
parameters.
# - max_depth=5
# - n_estimators=10
o

# - max_features=10
aC s

# - random_state=seed
v i y re

#
# - Train the model with train_data and train_label.
#
# - Now predict the output with test_data.
#
ed d

# - Evaluate the classifier with score from test_data and test_label.


ar stu

# In[26]:
sh is

from sklearn.ensemble import RandomForestClassifier


Th

classifier = RandomForestClassifier( max_depth=5, n_estimators=10,


max_features=10, random_state=seed )

classifier = classifier.fit( train_data , train_label


)

churn_predicted_target=classifier.predict( test_data )

score = classifier.score( test_data ,


test_label )

print('Random Forest Classifier : ', score )

with open('output1.txt', 'w') as file:


file.write(str(np.mean(score)))

This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00

https://www.coursehero.com/file/79338366/structured-testpy/
# In[ ]:

m
er as
co
eH w
o.
rs e
ou urc
o
aC s
v i y re
ed d
ar stu
sh is
Th

This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00

https://www.coursehero.com/file/79338366/structured-testpy/
Powered by TCPDF (www.tcpdf.org)

You might also like