This Study Resource Was

#!
/usr/bin/env python
# coding: utf-8
# **Run the Cell to import the packages**
# In[1]:
import pandas as pd
import numpy as np
import dataframe as df
# **Data Loading**
# **Fill in the Command to load your CSV dataset "weather.csv" with pandas**
# In[2]:
weather = pd.read_csv('weather.csv', sep=',')
m
# **Data Analysis**
er as
#
co
# - Get the shape of the dataset and print it.
eH w
#
# - Get the column names in list and print it.
o.
# rs e
# - Describe the dataset to understand the basic statistics of the dataset.
ou urc
#
# - Print the first three rows of the dataset
# In[5]:
o
aC s
v i y re
data_size=weather.size
print(data_size)
weather_col_names =weather.columns
ed d
ar stu
print(weather_col_names)
print( weather.describe() )
print( weather.iloc[:3] )
sh is
Th
# **Target Identification**
#
# Execute the below cell to identify the target variables. If yes it will Rain
Tommorow otherwise it will not Rain.
# In[6]:
weather_target=weather['RainTomorrow']
print(weather_target)
# **Feature Identification**
#
# In our case by analyzing the dataset, we can understand that the columns like
This study source was downloaded by 100000800853935 from CourseHero.com on 06-26-2021 00:09:12 GMT -05:00
https://www.coursehero.com/file/79338366/structured-testpy/
**Date** might be irrelevant as they are not dependent on call usage pattern.
#
# Since **RainTomorrow** is our target variable, we will be removing it from the
feature set.
#
# - Perform appropriate operation to drop the columns **Date** and
**RainTomorrow**
# In[10]:
cols_to_drop = ['Date','RainTomorrow']
weather_feature = weather.drop(columns=cols_to_drop)
print(weather_feature.head(5))
# **Categorical Data**
#
# In order to Identify the categorical variable in a data, use the following
command in the below cell,
m
er as
# In[11]:
co
eH w
weather_categorical = weather.select_dtypes(include=[object])
o.
print(weather_categorical.head(15)) rs e
ou urc
# **Convert to boolean**
#
# Assign the column **RainToday** for the variable **yes_no_cols** and run the
o
below cell to print first 5 rows of **weather_feature**

aC s
#
v i y re
# In[14]:
yes_no_cols = ["RainToday"]
ed d
ar stu
weather_feature[yes_no_cols] = weather_feature[yes_no_cols] == 'Yes'
print(weather_feature.head(5))
sh is
# **One Hot Encoding**

#
Th
# Execute the below cells to perform **One Hot Encoding**
# In[15]:
weather_dumm=pd.get_dummies(weather_feature,
columns=["Location","WindGustDir","WindDir9am","WindDir3pm"],
prefix=["Location","WindGustDir","WindDir9am","WindDir3pm"])
weather_matrix = weather_dumm.values.astype(np.float)
# **Imputing-Missing Values**
#
# Do the Imputing-Missing Values by using the following parameters
#
# - missing_values=np.nan
# - strategy=mean
# - fill_value=None
# - verbose=0
# - copy=True
#
# In[16]:
from sklearn.impute import SimpleImputer
imp=SimpleImputer( missing_values=np.nan, strategy='mean' ,fill_value=None

,verbose=0 ,copy=True )
weather_matrix=imp.fit_transform(weather_matrix)
# **Standardization**
#
# Run the below cell to perform standardization
m
# In[17]:
er as
co
eH w
from sklearn.preprocessing import StandardScaler
o.
#Standardize the data by removing the mean and scaling to unit variance
rs e
ou urc
scaler = StandardScaler()
#Fit to data, then transform it.

o
weather_matrix = scaler.fit_transform(weather_matrix)
aC s
v i y re
# **Train and Test Data**

#
# Splitting the data for training and testing(90% train,10% test)
#
ed d
# - Perform train-test split on **weather_matrix** and **weather_target** with

ar stu
90% as train data and 10% as test data and set random_state as seed.
# In[20]:
sh is
from sklearn.model_selection import train_test_split

Th
seed=5000
train_data, test_data, train_label, test_label =
train_test_split(weather_matrix, weather_target,train_size=.9, test_size=0.1,
random_state=seed)
# **Decision Tree Classification**

#
# - Initialize **SVM** classifier with following parameters
# - kernel = linear
# - C= 0.025
# - random_state=seed
#
# - Train the model with train_data and train_label
#
# - Now predict the output with test_data
#
# - Evaluate the classifier with score from test_data and test_label
#
# - Print the predicted score
#
#
# In[24]:
from sklearn.svm import SVC
classifier = SVC( kernel = 'linear', C= 0.025 ,random_state=seed

)
classifier = classifier.fit( train_data, train_label )
churn_predicted_target=classifier.predict( test_data )
score = classifier.score( test_data, test_label )
print('SVM Classifier : ', score )
m
er as
with open('output.txt', 'w') as file:
co
file.write(str(np.mean(score)))
eH w
o.
# **Random Forest Classifier** rs e
#
ou urc
# - Do the **Random Forest** Classifier of the Dataset using the following
parameters.
# - max_depth=5
# - n_estimators=10
o
# - max_features=10
aC s
# - random_state=seed
v i y re
#
# - Train the model with train_data and train_label.
#
# - Now predict the output with test_data.
#
ed d
# - Evaluate the classifier with score from test_data and test_label.

ar stu
# In[26]:
sh is
from sklearn.ensemble import RandomForestClassifier

Th
classifier = RandomForestClassifier( max_depth=5, n_estimators=10,

max_features=10, random_state=seed )
classifier = classifier.fit( train_data , train_label

)
churn_predicted_target=classifier.predict( test_data )
score = classifier.score( test_data ,

test_label )
print('Random Forest Classifier : ', score )
with open('output1.txt', 'w') as file:

file.write(str(np.mean(score)))
# In[ ]:
m
er as
co
eH w
o.
rs e
ou urc
o
aC s
v i y re
ed d
ar stu
sh is
Th
Powered by TCPDF (www.tcpdf.org)

This Study Resource Was

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

This Study Resource Was

Uploaded by

Copyright:

Available Formats

#!

# **Run the Cell to import the packages**

weather = pd.read_csv('weather.csv', sep=',')

below cell to print first 5 rows of **weather_feature**

weather_feature[yes_no_cols] = weather_feature[yes_no_cols] == 'Yes'

# **One Hot Encoding**

# Execute the below cells to perform **One Hot Encoding**

from sklearn.impute import SimpleImputer

imp=SimpleImputer( missing_values=np.nan, strategy='mean' ,fill_value=None

#Fit to data, then transform it.

# **Train and Test Data**

# - Perform train-test split on **weather_matrix** and **weather_target** with

from sklearn.model_selection import train_test_split

# **Decision Tree Classification**

from sklearn.svm import SVC

classifier = SVC( kernel = 'linear', C= 0.025 ,random_state=seed

classifier = classifier.fit( train_data, train_label )

score = classifier.score( test_data, test_label )

print('SVM Classifier : ', score )

# - Evaluate the classifier with score from test_data and test_label.

from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier( max_depth=5, n_estimators=10,

classifier = classifier.fit( train_data , train_label

score = classifier.score( test_data ,

print('Random Forest Classifier : ', score )

with open('output1.txt', 'w') as file:

You might also like

# Run the Cell to import the packages

below cell to print first 5 rows of weather_feature

# One Hot Encoding

# Execute the below cells to perform One Hot Encoding

# Train and Test Data

# - Perform train-test split on weather_matrix and weather_target with

# Decision Tree Classification