Professional Documents
Culture Documents
Message
Message
import pandas as pd
import matplotlib.pyplot as plt
import scipy.linalg as la
import sys
from numpy import genfromtxt
from collections import Counter
def loader(fileName):
return pd.read_csv(fileName)
def linea(eu1,eu2,x0,y0,x):
return ((eu2*x)-(eu2*x0)+(eu1*y0))/eu1
k = 2.6
df = loader('2DOutliers.csv')
rawData = np.array(df)
cleanData = cleaner(df, k)
outliders = getOutladers(df, k)
print('rawdata')
print(rawData)
print('cleandata')
print(cleanData)
print('outladers')
print(outliders)
print('\n')
array_xvo = outliders[:, 0]
array_yvo = outliders[:, 1]
array_xcl = cleanData[:, 0]
array_ycl = cleanData[:, 1]
# Slicing array
array_flavanoids = my_csv[:, 0]
# Slicing array
array_colorintensity = my_csv[:, 1]
plt.subplot(221)
plt.scatter(array_flavanoids, array_colorintensity, marker='o')
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.title('DATOS CON OUTLIERS MARCADOS', fontsize=10)
plt.scatter(array_xvo, array_yvo, marker='o')
#plt.show()
plt.subplot(222)
plt.scatter(array_flavanoids, array_colorintensity, marker='o')
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.title('DATOS EN BRUTO', fontsize=10)
#plt.show()
plt.subplot(223)
plt.scatter(array_xvo, array_yvo, marker='o')
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.title('SOLO OUTLIERS', fontsize=10)
#plt.show()
plt.subplot(224)
plt.scatter(array_xcl, array_ycl, marker='o')
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.title('DATOS LIMPIOS SIN OUTLIERS', fontsize=10)
plt.show()
# rawdata
print('rawdata eugenvector')
covmatRaw = np.cov(rawData.T)
print('la matrix de covariansa es:')
print(covmatRaw)
print('\n')
resultsRaw = la.eig(covmatRaw)
print('eigenvaulues: ')
print(resultsRaw[0].real)
print('\n')
print('eigenvector: ')
print(resultsRaw[1])
print('\n')
print('\n')
# cleandata
print('cleandata eugenvector')
covmatClean = np.cov(cleanData.T)
print('la matriz de covariansa es:')
print(covmatClean)
print('\n')
resultsClean = la.eig(covmatClean)
print('eigenvaulues: ')
print(resultsClean[0].real)
print('\n')
print('eigenvector: ')
print(resultsClean[1])
print('\n')
centroideRaw=np.array([np.mean(rawData[:,0]),np.mean(rawData[:,1])])
print(centroideRaw)
print('\n')
centroideClean=np.array([np.mean(cleanData[:,0]),np.mean(cleanData[:,1])])
print(centroideClean)
eugenvectors=np.array(resultsRaw[1])
print(eugenvectors)
x=range(-2,8)
plt.plot(x,
[linea(eugenvectors[0,0],eugenvectors[0,1],centroideRaw[0],centroideRaw[1],i) for i
in x])
plt.axhline(0,color="black")
plt.axvline(0,color="black")
plt.xlim(-2,8)
plt.ylim(-2,8)
plt.show()