Professional Documents
Culture Documents
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot
import scipy.stats as st
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import math as mt
import itertools
%matplotlib inline
Leer archivo
df = pd.read_excel("nombreExcel.xlsx") o = pd.read_csv(“nombreArchivo.csv”)
df
Análisis Numérico
df.describe()
Histograma y Boxplot
ax = fig.add_subplot(121)
ax.hist(df['nombreVariable'])
ax.set_xlabel('nombreVariable')
ax.set_ylabel('frecuencia')
ax.set_title('Histograma')
ax = fig.add_subplot(122)
ax1 = df.boxplot(column=['nombreVariable'], grid=True)
ax.set_title('Box plot')
plt.tight_layout()
Distribución normal
x = st.expon(scale = df.nombreVariable.mean())
pesos = df['nombreVariable']
histogram = pesos.hist(density=True)
vals = np.linspace(nombreVariable.min(),nombreVariable.max(),1000)
histogram.plot(vals,st.expon(scale=nombreVariable.mean()).pdf(vals))
Distribución gamma
Chi-2
k = mt.floor(1+mt.log2(nombreVariable.count()))
a = [x.ppf(0)]
for j in range(1, k+1):
a.append(x.ppf(j / k))
frec_obs, _ = np.histogram(nombreVariable,bins=a)
st.chisquare(frec_obs)
Prueba KS
st.kstest(nombreVariable, x.cdf)
Prueba AD
st.anderson(“nombreVariable”,'expon') o st.anderson(nombreVariable,'norm') o
st.anderson(nombreVariable,'gumbel')
nombreVariable.min()
Q-Q Plot y P-P Plot
n = peso.count()
df['nombreVariable'] = df.index
dt = df.groupby('nombreVariable').count()
dt.columns = ['count']
dt['cum_count'] = dt['count'].cumsum()
dt['f'] = dt['cum_count'] / n
dt['f_bigote'] = dt['f'] - 0.5 / n
dt['f_gorro'] = x.cdf(dt.index)
dt['x_M'] = x.ppf(dt['f_bigote'])
ax = fig.add_subplot(121)
ax.set_title('Q-Q Plot')
ax.set_xlabel('$x^s$')
ax.set_ylabel('$x^M$')
v_min = min(dt['x_M'].min(), dt.index.min())
v_max = max(dt['x_M'].max(), dt.index.max())
ax.plot([v_min, v_max], [v_min, v_max])
ax.plot(dt.index, dt['x_M'], 'o', ls='None', alpha=0.3)
ax.axis('equal')
ax = fig.add_subplot(122)
ax.set_title('P-P Plot')
ax.set_xlabel('$\widetilde{F}$')
ax.set_ylabel('$\hat{F}$')
ax.plot([0, 1], [0, 1])
ax.plot(dt['f_bigote'], dt['f_gorro'], 'o', ls='None', alpha=0.3)
ax.axis('equal')
fig.subplots_adjust(top=0.8, wspace=0.3)
Largo
len(df)
Prueba respuesta
modelos = []
subconjuntos = itertools.combinations(all_vars, k)
for sc in subconjuntos:
df_mdls = pd.DataFrame(modelos)
df_mdls.sort_values(['R2adj'], ascending=False)
ax = fig.add_subplot(221)
ax.scatter(df['Obs'], resultado.resid_pearson, color='blue')
ax.set_xlabel('Observations')
ax.set_ylabel('Standard error')
ax = fig.add_subplot(222)
ax.scatter(resultado.fittedvalues, resultado.resid_pearson, color='blue')
ax.set_xlabel('Mean response')
ax.set_ylabel('Standard error')
ax = fig.add_subplot(223)
st.probplot(resultado.resid_pearson, plot=ax)
ax = fig.add_subplot(224)
ax.hist(resultado.resid_pearson, bins=6)
ax.set_xlabel('Standard error')
ax.set_ylabel('Frecuency')
ax.set_title('Histogram')
plt.tight_layout()
df.nunique()
df['nombreVariable'].unique()
Réplicas
df['nombreVariable'].value_counts()
Efectos
rv = st.norm(loc=np.mean(effects),scale=np.std(effects))
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
(_y, _x), _ = st.probplot(effects,dist=rv)
ax.scatter(_x,_y)
ax.set_ylabel('Theorical quantiles')
ax.set_xlabel('Effect')
ax.set_title('Normal probability plot of the effects')
plt.show()
Tabla Anova
anova_lm(res)
Interacción
interaction_plot(df["Factor"],df["Factor"],df["VariablePrincipal"]);
Tukey
comp.plot_simultaneous(figsize=(8,4));
4 Graficos
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(221)
ax.scatter(df["Factor"],res.resid, color = "r")
ax.set_xlabel("Observaciones")
ax.set_ylabel("Error Estandar")
ax = fig.add_subplot(222)
ax.scatter(res.fittedvalues, res.resid, color="b")
ax.set_xlabel("Respuesta media")
ax.set_ylabel("Error Estandar")
#PPplot
ax = fig.add_subplot(223)
st.probplot(res.resid_pearson,plot=ax);
#Histograma
ax = fig.add_subplot(224)
ax.hist(res.resid_pearson)
ax.set_xlabel("Error Estandar")
ax.set_ylabel("Frecuencia")
plt.tight_layout()