You are on page 1of 4

import csv

import random
import math
import pandas as pd
import numpy as np

# Paso 1: cargar y dividir los datos

def load_dataset(filename, split):


training_set = []
test_set = []
with open(filename, 'r') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for i in range(len(dataset) - 1):
for j in range(4):
dataset[i][j] = float(dataset[i][j])
if random.random() < split:
test_set.append(dataset[i])
else:
training_set.append(dataset[i])
return training_set, test_set

# Paso 2: calcular la distancia euclidiana

def euclidean_distance(instance1, instance2, length):


distance = 0
for i in range(length):
distance += pow((instance1[i] - instance2[i]), 2)
return math.sqrt(distance)

# Paso 3: implementar el algoritmo KNN

def knn(training_set, test_instance, k):


distances = []
length = len(test_instance) - 1
for i in range(len(training_set)):
dist = euclidean_distance(test_instance, training_set[i], length)
distances.append((training_set[i], dist))
distances.sort(key=lambda x: x[1])
neighbors = []
for i in range(k):
neighbors.append(distances[i][0])
votes = {}
for i in range(len(neighbors)):
response = neighbors[i][-1]
if response in votes:
votes[response] += 1
else:
votes[response] = 1
sorted_votes = sorted(votes.items(), key=lambda x: x[1], reverse=True)
return sorted_votes[0][0]

# Paso 4: evaluar el rendimiento del algoritmo


def evaluate_algorithm(training_set, test_set, k):
correct_predictions = {'Iris-setosa': 0,
'Iris-versicolor': 0, 'Iris-virginica': 0}
wrong_predictions = {'Iris-setosa': 0,
'Iris-versicolor': 0, 'Iris-virginica': 0}
for i in range(len(test_set)):
prediction = knn(training_set, test_set[i], k)
if prediction == test_set[i][-1]:
correct_predictions[prediction] += 1
else:
wrong_predictions[test_set[i][-1]] += 1
return correct_predictions, wrong_predictions

# Paso 5: reportar los resultados

def report_results(correct_predictions, wrong_predictions):


print("Correct predictions:")
print(correct_predictions)
print("Wrong predictions:")
print(wrong_predictions)
total_correct = sum(correct_predictions.values())
total_wrong = sum(wrong_predictions.values())
print("Total accuracy:", total_correct / (total_correct + total_wrong))
return total_correct / (total_correct + total_wrong)

k_values = [3, 5, 7, 9, 11]

best_k = 0

# Ejecutar el código
print("para el 80%")
filename = 'color_iris.csv'
split = 0.2
training_set, test_set = load_dataset(filename, split)
kbest_80 = 0
kaccuracy80 = 0.01

for i in k_values:
print("Para k= ", i)
# utilizar el conjunto completo para la evaluación final
training_set, test_set = load_dataset(filename, 1 - split)
correct_predictions, wrong_predictions = evaluate_algorithm(
training_set, test_set, i)
valor = report_results(correct_predictions, wrong_predictions)
if valor >= kaccuracy80:
kbest_80 = i
kaccuracy80 = valor
print("El mejor k para 80% fue de: ", kbest_80)

# Ejecutar el código
print("para el 50%")
filename = 'color_iris.csv'
split = 0.5
training_set, test_set = load_dataset(filename, split)
kbest_50 = 0
kaccuracy50 = 0.01
for i in k_values:
print("Para k= ", i)
# utilizar el conjunto completo para la evaluación final
training_set, test_set = load_dataset(filename, 1 - split)
correct_predictions, wrong_predictions = evaluate_algorithm(
training_set, test_set, i)
valor = report_results(correct_predictions, wrong_predictions)
if valor >= kaccuracy50:
kbest_50 = i
kaccuracy50 = valor
print("El mejor k para 50% fue de: ", kbest_50)

# Ejecutar el código
print("para el 75%")
filename = 'color_iris.csv'
split = 0.25
training_set, test_set = load_dataset(filename, split)
kbest_75 = 0
kaccuracy75 = 0.01

for i in k_values:
print("Para k= ", i)
# utilizar el conjunto completo para la evaluación final
training_set, test_set = load_dataset(filename, 1 - split)
correct_predictions, wrong_predictions = evaluate_algorithm(
training_set, test_set, i)
valor = report_results(correct_predictions, wrong_predictions)
if valor >= kaccuracy75:
kbest_75 = i
kaccuracy75 = valor
print("El mejor k para 75% fue de: ", kbest_75)

data = pd.read_csv("Iris.csv")

# Separamos los datos por especies


setosa = data.loc[data['Species'] == 'Iris-setosa']
versicolor = data.loc[data['Species'] == 'Iris-versicolor']
virginica = data.loc[data['Species'] == 'Iris-virginica']
# Seleccionamos 10 flores al azar de cada especie
np.random.seed(42)
setosa_sample = setosa.sample(n=10, random_state=1)
versicolor_sample = versicolor.sample(n=10, random_state=1)
virginica_sample = virginica.sample(n=10, random_state=1)

# Calculamos la distancia euclidiana de la primera y cuarta columna


def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2)**2))

setosa_distance = []
versicolor_distance = []
virginica_distance = []

for i in range(len(data)):
if data.iloc[i]['Species'] == 'Iris-setosa':
distance = euclidean_distance(data.iloc[i][[0, 3]],
setosa_sample.mean(numeric_only=True)[[0, 3]])
setosa_distance.append(distance)
elif data.iloc[i]['Species'] == 'Iris-versicolor':
distance = euclidean_distance(data.iloc[i][[0, 3]],
versicolor_sample.mean(numeric_only=True)[[0, 3]])
versicolor_distance.append(distance)
else:
distance = euclidean_distance(data.iloc[i][[0, 3]],
virginica_sample.mean(numeric_only=True)[[0, 3]])
virginica_distance.append(distance)

# Calculamos los valores de sensibilidad, especificidad y precisión


setosa_tp = len([x for x in setosa_distance if x <= np.max(versicolor_distance +
virginica_distance)])
setosa_fn = len([x for x in setosa_distance if x > np.max(versicolor_distance +
virginica_distance)])
setosa_fp = len(versicolor_distance + virginica_distance)

versicolor_tp = len([x for x in versicolor_distance if x <= np.max(setosa_distance


+ virginica_distance)])
versicolor_fn = len([x for x in versicolor_distance if x > np.max(setosa_distance +
virginica_distance)])
versicolor_fp = len(setosa_distance + virginica_distance)

virginica_tp = len([x for x in virginica_distance if x <= np.max(setosa_distance +


versicolor_distance)])
virginica_fn = len([x for x in virginica_distance if x > np.max(setosa_distance +
versicolor_distance)])
virginica_fp = len(setosa_distance + versicolor_distance)

setosa_precision = setosa_tp / (setosa_tp + setosa_fp)


versicolor_precision = versicolor_tp / (versicolor_tp + versicolor_fp)
virginica_precision = virginica_tp / (virginica_tp + virginica_fp)

setosa_sensitivity = setosa_tp / (setosa_tp + setosa_fn)


versicolor_sensitivity = versicolor_tp / (versicolor_tp + versicolor_fn)
virginica_sensitivity= virginica_tp / (virginica_tp + virginica_fn)

setosa_specificity = setosa_tp / (setosa_tp+setosa_fp)


versicolor_specificity =versicolor_tp / (versicolor_tp + versicolor_fp)
virginica_specificity =virginica_tp / (virginica_tp + virginica_fp)

print('Setosa specificity: ', setosa_specificity, ' Setosa sensitivity: ',


setosa_sensitivity,' Setosa precision ', setosa_precision)
print('Virginica specificity: ', virginica_specificity, ' Virginica sensitivity: ',
virginica_sensitivity,' Virginica precision ', virginica_precision)
print('Versicolor specificity: ', versicolor_specificity, ' Versicolor sensitivity:
', versicolor_sensitivity,' Versicolor precision ', versicolor_precision)

You might also like