Professional Documents
Culture Documents
ipynb - Colaboratory
pip install pingouin
import pandas as pd
import numpy as np
import scipy.stats as ss
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from statsmodels.stats.diagnostic import het_breuschpagan
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from google.colab import drive
data = pd.read_excel("/content/drive/MyDrive/Colab Notebooks/solemne 3/Datos_solemne_3-1.xlsx")
data.head()
data = data.dropna()
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 y 25 non-null float64
1 Gravedad_crudo 25 non-null float64
2 Presión_vapor 25 non-null float64
3 Temperatura10 25 non-null int64
4 Temperatura100 25 non-null int64
dtypes: float64(3), int64(2)
memory usage: 1.1 KB
mod01 = smf.ols("y~Gravedad_crudo+Presión_vapor+Temperatura10+Temperatura100", data=data)
mod01 = mod01.fit()
print(mod01.summary())
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 7.79e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
https://colab.research.google.com/drive/1OdLoeiyvo3JGcD2BSXoil2dwa8uaBd7R#scrollTo=-IawVxQBpjwD&printMode=true 1/4
31/5/23, 21:54 Desarrollo Solemne 3 - Carola Araya D..ipynb - Colaboratory
mod02 = smf.ols("y~Gravedad_crudo+Temperatura10+Temperatura100", data=data)
mod02 = mod02.fit()
print(mod02.summary())
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 7.74e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
from pandas.core.groupby import groupby
data["resid"]=mod02.resid
data["predict"]=mod02.predict()
plt.figure(figsize=(3,3))
sns.histplot(data=data, x="y", kde=True)
plt.title("Histograma y")
plt.show()
sns.set()
plt.figure(figsize=(3,3))
pg.qqplot(data["y"], "norm")
plt.show()
https://colab.research.google.com/drive/1OdLoeiyvo3JGcD2BSXoil2dwa8uaBd7R#scrollTo=-IawVxQBpjwD&printMode=true 2/4
31/5/23, 21:54 Desarrollo Solemne 3 - Carola Araya D..ipynb - Colaboratory
ss.kstest(data["y"], "norm")
ss.normaltest(data["y"])
NormaltestResult(statistic=1.5858242685723607, pvalue=0.452525060848027)
from statsmodels.stats.diagnostic import het_breuschpagan
bp_test= het_breuschpagan(mod02.resid, mod02.model.exog)
bp_test
(3.987442707411526,
0.26282328717345105,
1.3283532586357687,
0.2917992311613149)
plt.figure(figsize=(5,5))
sns.scatterplot(data =data,x = "predict", y = "y")
plt.axhline(y=0, color="Blue", linestyle="--")
plt.show()
https://colab.research.google.com/drive/1OdLoeiyvo3JGcD2BSXoil2dwa8uaBd7R#scrollTo=-IawVxQBpjwD&printMode=true 3/4
31/5/23, 21:54 Desarrollo Solemne 3 - Carola Araya D..ipynb - Colaboratory
https://colab.research.google.com/drive/1OdLoeiyvo3JGcD2BSXoil2dwa8uaBd7R#scrollTo=-IawVxQBpjwD&printMode=true 4/4