Professional Documents
Culture Documents
Metode Klasifikasi
Metode Klasifikasi
1. Logistic Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
plt.show()
2. K-Means
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
# Membagi data menjadi data training dan data testing
X = df[['Jumlah_Pengunjung', 'Durasi_Pengunjung']]
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
plt.show()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
# Membuat data penjualan e-commerce sederhana
data = {
'Jumlah_Pengunjung': [150, 200, 300, 120, 180, 250, 140, 280, 200, 320],
'Durasi_Pengunjung': [2, 3, 4, 1, 2, 3, 1.5, 4, 3, 5],
'Total_Penjualan': [500, 800, 1200, 300, 600, 1000, 400, 1100, 900, 1300]
}
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
5. Decision Trees
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
6. Random Forest
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
7. Naive Bayes
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
plt.show()
8. Neural Networks
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
plt.show()
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.DataFrame(data)
# Membuat label target, misalnya jika Total_Penjualan > 1000 maka 'Laris', jika 500 <= Total_Penjualan <=
1000 maka 'Sedang', sebaliknya 'Tidak Laris'
df['Label'] = pd.cut(df['Total_Penjualan'], bins=[-float('inf'), 500, 1000, float('inf')], labels=['Tidak Laris',
'Sedang', 'Laris'])
# Membagi data menjadi data training dan data testing
X = df[['Jumlah_Pengunjung', 'Durasi_Pengunjung']]
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Analisis training
train_accuracy = accuracy_score(y_train, y_train_pred)
print(f"Training Accuracy: {train_accuracy:.2f}")
# Analisis testing
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Testing Accuracy: {test_accuracy:.2f}")
plt.subplot(1, 2, 1)
sns.scatterplot(x='Jumlah_Pengunjung', y='Durasi_Pengunjung', hue='Label', data=pd.concat([X_train,
y_train], axis=1))
plt.title(f'Data Training - {model_name}')
plt.subplot(1, 2, 2)
sns.scatterplot(x='Jumlah_Pengunjung', y='Durasi_Pengunjung', hue='Label', data=pd.concat([X_test,
y_test], axis=1))
plt.title(f'Data Testing - {model_name}')
plt.show()
Logistic Regression
K-Means
K-Nearest Neighbors (K-NN)
Support Vector Machines (SVM)
Decision Trees
Random Forest
Naive Bayes
Neural Networks
3. Membuat ppt, pilihlah salah satu diantara metode tersebut jelaskan penggunaannya, rumusnya dan
parameternya. Bisa menggunakan bantuan ChatGPT.