You are on page 1of 1

C H E AT S H E E T

Machine Learning
Algorithms
( Python and R Codes)

Types
Supervised Learning Unsupervised Learning Reinforcement Learning
Decision Tree Random Forest Apriori algorithm k-means Markov Decision Process
kNN Logistic Regression Hierarchical Clustering Q Learning

Python R
Code Code

#Import Library #Load Train and Test datasets


#Import other necessary libraries like pandas, #Identify feature and response variable(s) and
#numpy... #values must be numeric and numpy arrays
from sklearn import linear_model x_train <- input_variables_values_training_datasets
#Load Train and Test datasets y_train <- target_variables_values_training_datasets
#Identify feature and response variable(s) and x_test <- input_variables_values_test_datasets
#values must be numeric and numpy arrays x <- cbind(x_train,y_train)
x_train=input_variables_values_training_datasets #Train the model using the training sets and
Regression

y_train=target_variables_values_training_datasets #check score


Linear

x_test=input_variables_values_test_datasets linear <- lm(y_train ~ ., data = x)


#Create linear regression object summary(linear)
linear = linear_model.LinearRegression() #Predict Output
#Train the model using the training sets and predicted= predict(linear,x_test)
#check score
linear.fit(x_train, y_train)
linear.score(x_train, y_train)
#Equation coefficient and Intercept
print('Coefficient: \n', linear.coef_)
print('Intercept: \n', linear.intercept_)
#Predict Output
predicted= linear.predict(x_test)

#Import Library x <- cbind(x_train,y_train)


from sklearn.linear_model import LogisticRegression #Train the model using the training sets and check
#Assumed you have, X (predictor) and Y (target) #score
#for training data set and x_test(predictor) logistic <- glm(y_train ~ ., data = x,family='binomial')
#of test_dataset summary(logistic)
Regression

#Create logistic regression object #Predict Output


Logistic

model = LogisticRegression() predicted= predict(logistic,x_test)


#Train the model using the training sets
#and check score
model.fit(X, y)
model.score(X, y)
#Equation coefficient and Intercept
print('Coefficient: \n', model.coef_)
print('Intercept: \n', model.intercept_)
#Predict Output
predicted= model.predict(x_test)

#Import Library #Import Library


#Import other necessary libraries like pandas, numpy... library(rpart)
from sklearn import tree x <- cbind(x_train,y_train)
#Assumed you have, X (predictor) and Y (target) for #grow tree
#training data set and x_test(predictor) of fit <- rpart(y_train ~ ., data = x,method="class")
Decision Tree

#test_dataset summary(fit)
#Create tree object #Predict Output
model = tree.DecisionTreeClassifier(criterion='gini') predicted= predict(fit,x_test)
#for classification, here you can change the
#algorithm as gini or entropy (information gain) by
#default it is gini
#model = tree.DecisionTreeRegressor() for
#regression
#Train the model using the training sets and check
#score
model.fit(X, y)
model.score(X, y)
#Predict Output
predicted= model.predict(x_test)
SVM (Support Vector Machine)

#Import Library #Import Library


from sklearn import svm library(e1071)
#Assumed you have, X (predictor) and Y (target) for x <- cbind(x_train,y_train)
#training data set and x_test(predictor) of test_dataset #Fitting model
#Create SVM classification object fit <-svm(y_train ~ ., data = x)
model = svm.svc() summary(fit)
#there are various options associated #Predict Output
with it, this is simple for classification. predicted= predict(fit,x_test)
#Train the model using the training sets and check
#score
model.fit(X, y)
model.score(X, y)
#Predict Output
predicted= model.predict(x_test)

#Import Library #Import Library


from sklearn.naive_bayes import GaussianNB library(e1071)
Naive Bayes

#Assumed you have, X (predictor) and Y (target) for x <- cbind(x_train,y_train)


#training data set and x_test(predictor) of test_dataset #Fitting model
#Create SVM classification object model = GaussianNB() fit <-naiveBayes(y_train ~ ., data = x)
#there is other distribution for multinomial classes summary(fit)
like Bernoulli Naive Bayes #Predict Output
#Train the model using the training sets and check predicted= predict(fit,x_test)
#score
model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)
kNN (k- Nearest Neighbors)

#Import Library #Import Library


from sklearn.neighbors import KNeighborsClassifier library(knn)
#Assumed you have, X (predictor) and Y (target) for x <- cbind(x_train,y_train)

#training data set and x_test(predictor) of test_dataset #Fitting model


fit <-knn(y_train ~ ., data = x,k=5)
#Create KNeighbors classifier object model
summary(fit)
KNeighborsClassifier(n_neighbors=6)
#Predict Output
#default value for n_neighbors is 5
predicted= predict(fit,x_test)
#Train the model using the training sets and check score
model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)

#Import Library #Import Library


from sklearn.cluster import KMeans library(cluster)
#Assumed you have, X (attributes) for training data set fit <- kmeans(X, 3)
k-Means

#and x_test(attributes) of test_dataset #5 cluster solution


#Create KNeighbors classifier object model
k_means = KMeans(n_clusters=3, random_state=0)
#Train the model using the training sets and check score
model.fit(X)
#Predict Output
predicted= model.predict(x_test)

#Import Library #Import Library


from sklearn.ensemble import RandomForestClassifier library(randomForest)
Random Forest

#Assumed you have, X (predictor) and Y (target) for x <- cbind(x_train,y_train)


#training data set and x_test(predictor) of test_dataset #Fitting model
#Create Random Forest object fit <- randomForest(Species ~ ., x,ntree=500)
model= RandomForestClassifier() summary(fit)
#Train the model using the training sets and check score #Predict Output
model.fit(X, y) predicted= predict(fit,x_test)
#Predict Output
predicted= model.predict(x_test)
Dimensionality Reduction Algorithms

#Import Library #Import Library


from sklearn import decomposition library(stats)
#Assumed you have training and test data set as train and pca <- princomp(train, cor = TRUE)
#test train_reduced <- predict(pca,train)
#Create PCA object pca= decomposition.PCA(n_components=k) test_reduced <- predict(pca,test)
#default value of k =min(n_sample, n_features)
#For Factor analysis
#fa= decomposition.FactorAnalysis()
#Reduced the dimension of training dataset using PCA
train_reduced = pca.fit_transform(train)
#Reduced the dimension of test dataset
test_reduced = pca.transform(test)
Gradient Boosting & AdaBoost

#Import Library #Import Library


from sklearn.ensemble import GradientBoostingClassifier library(caret)
#Assumed you have, X (predictor) and Y (target) for x <- cbind(x_train,y_train)
#training data set and x_test(predictor) of test_dataset #Fitting model
#Create Gradient Boosting Classifier object fitControl <- trainControl( method = "repeatedcv",
model= GradientBoostingClassifier(n_estimators=100, \ + number = 4, repeats = 4)
learning_rate=1.0, max_depth=1, random_state=0) fit <- train(y ~ ., data = x, method = "gbm",
#Train the model using the training sets and check score + trControl = fitControl,verbose = FALSE)
model.fit(X, y) predicted= predict(fit,x_test,type= "prob")[,2]
#Predict Output
predicted= model.predict(x_test)

To view complete guide on Machine Learning Algorithms, visit here :

http://bit.ly/1DOUS8N
w w w . a n a l y t i c s v i d h y a . c o m

You might also like