Professional Documents
Culture Documents
pandas as pd
import numpy as np
from sklearn.decomposition import FactorAnalysis
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_csv('/content/lmdiplus.csv')
# 筛选需要进行因子分析的变量
variables = data[['能源结构强度效应', '能源消费强度效应', '经济发展效应', '人口规模效应']]
# 使用因子分析进行降维
fa = FactorAnalysis(n_components=2)
fa.fit(variables)
factor_scores = fa.transform(variables)
# 绘制因子得分图
plt.scatter(factor_scores[:, 0], factor_scores[:, 1])
plt.xlabel('Factor 1')
plt.ylabel('Factor 2')
plt.title('Factor Scores Plot')
plt.show()
# 输出因子载荷矩阵
factor_loadings = pd.DataFrame(fa.components_.T, index=variables.columns, columns=['Factor 1', 'Factor 2'])
print(factor_loadings)
Factor 1 Factor 2
能源结构强度效应 -1.789824 14.535598
能源消费强度效应 -20.187946 -8.608929
经济发展效应 20.829704 0.697969
人口规模效应 0.318941 0.135739
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import plotly.graph_objs as go
# 加载数据集
data = pd.read_csv('/content/lmdiplus.csv')
# 提取用于聚类的数据特征列
X = data[['能源结构强度效应', '能源消费强度效应', '经济发展效应']]
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 创建KMeans模型
kmeans = KMeans(n_clusters=3)
# 进行聚类
kmeans.fit(X_scaled)
# 获取聚类结果
labels = kmeans.labels_
# 使用 Plotly 绘制四维散点图
fig = go.Figure(data=[go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 1], z=X_scaled[:, 2],
mode='markers', marker=dict(size=5, color=labels))])
fig.update_layout(scene=dict(xaxis_title='能源结构强度效应', yaxis_title='能源消费强度效应',
zaxis_title='经济发展效应',), margin=dict(l=0, r=0, b=0, t=0))
fig.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureW
The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the val
# 创建一个字典来存储每个集群的数据点
cluster_data = {}
for i in range(kmeans.n_clusters):
cluster_data[i] = []
# 遍历每个数据点并将其附加到相应的集群
for j, label in enumerate(labels):
cluster_data[label].append(X.iloc[j])
# 打印每个集群的数据点
for i in range(kmeans.n_clusters):
print(f"Data points in cluster {i}:")
print(pd.DataFrame(cluster_data[i]))
# 创建KMeans模型
kmeans = KMeans(n_clusters=3)
# 进行聚类
kmeans.fit(X_scaled)
# 获取聚类结果
labels = kmeans.labels_
# 使用 Plotly 绘制四维散点图
fig = go.Figure(data=[go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 1], z=X_scaled[:, 2],
mode='markers', marker=dict(size=5, color=labels),
text=data.index),
go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 1], z=X_scaled[:, 3],
mode='markers', marker=dict(size=5, color=labels),
text=data.index),
go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 2], z=X_scaled[:, 3],
mode='markers', marker=dict(size=5, color=labels),
text=data.index),
go.Scatter3d(x=X_scaled[:, 1], y=X_scaled[:, 2], z=X_scaled[:, 3],
mode='markers', marker=dict(size=5, color=labels),
text=data.index)])
fig.update_layout(scene=dict(xaxis_title='能源结构强度效应', yaxis_title='能源消费强度效应',
zaxis_title='经济发展效应'), margin=dict(l=0, r=0, b=0, t=0))
fig.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureW
The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the val
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import plotly.graph_objs as go
# 加载数据集
data = pd.read_csv('/content/lmdiplus.csv')
# 提取用于聚类的数据特征列
X = data[['能源结构强度效应', '能源消费强度效应', '人口规模效应',]]
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 创建KMeans模型
kmeans = KMeans(n_clusters=3)
# 进行聚类
kmeans.fit(X_scaled)
# 获取聚类结果
labels = kmeans.labels_
# 使用 Plotly 绘制四维散点图
fig = go.Figure(data=[go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 1], z=X_scaled[:, 2],
mode='markers', marker=dict(size=5, color=labels))])
fig.update_layout(scene=dict(xaxis_title='能源结构强度效应', yaxis_title='能源消费强度效应',
zaxis_title='人口规模效应',), margin=dict(l=0, r=0, b=0, t=0))
fig.show()
# 创建一个字典来存储每个集群的数据点
cluster_data = {}
for i in range(kmeans.n_clusters):
cluster_data[i] = []
# 遍历每个数据点并将其附加到相应的集群
for j, label in enumerate(labels):
cluster_data[label].append(X.iloc[j])
# 打印每个集群的数据点
for i in range(kmeans.n_clusters):
print(f"Data points in cluster {i}:")
print(pd.DataFrame(cluster_data[i]))
data = pd.read_csv('/content/lmdiplus.csv')
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
▾ LinearRegression
LinearRegression()
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
coefficients = regressor.coef_
intercept = regressor.intercept_
print("Coefficients:", coefficients)
print("Intercept:", intercept)
pip install lazypredict
from lazypredict.Supervised import LazyClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
data = pd.read_csv('/content/lmdiplus.csv')
X = data[['能源结构强度效应', '能源消费强度效应', '经济发展效应', '人口规模效应']]
y = data['总碳排放量']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
print(models)
from lazypredict.Supervised import LazyClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
print(models)
Time Taken
Model
LinearSVC 0.08
Perceptron 0.02
LogisticRegression 0.04
SVC 0.05
XGBClassifier 0.15
LabelPropagation 0.08
LabelSpreading 0.03
BaggingClassifier 0.34
PassiveAggressiveClassifier 0.02
SGDClassifier 0.05
RandomForestClassifier 0.34
CalibratedClassifierCV 0.07
LGBMClassifier 0.14
QuadraticDiscriminantAnalysis 0.04
ExtraTreesClassifier 0.27
RidgeClassifierCV 0.04
RidgeClassifier 0.03
AdaBoostClassifier 1.33
KNeighborsClassifier 0.07
BernoulliNB 0.05
LinearDiscriminantAnalysis 0.06
GaussianNB 0.02
NuSVC 0.03
ExtraTreeClassifier 0.02
NearestCentroid 0.02
DecisionTreeClassifier 0.06
DummyClassifier 0.05
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# 读取数据
data = pd.read_csv("/content/lmdiplus.csv")
# 将数据分为自变量和因变量
X = data.iloc[:,1:].values
y = data.iloc[:,0].values
# 划分数据集
train_size = int(len(X) * 0.7)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
# 训练模型
model = LinearRegression()
model.fit(X_train, y_train)
# 预测结果
y_pred = model.predict(X_test)
# 计算均方误差
mse = mean_squared_error(y_test, y_pred)
print("均方误差:", mse)
# 输出回归系数
print("回归系数:", model.coef_)
均方误差: 1660.7356987576118
回归系数: [ 1.09242461 1.07164135 1.8201653 104.31937005]
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# 读取数据集
df = pd.read_csv('/content/lmdiplus.csv', delimiter=',')
# 计算相关系数矩阵
corr_matrix = df.corr()
# 绘制相关性矩阵热力图
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix Heatmap')
plt.show()
/usr/local/lib/python3.9/dist packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
import pandas as pd
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
data = pd.read_csv('/content/lmdiplus.csv', delimiter=',')
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
print(data.head())
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
总碳排放量 能源结构强度效应 能源消费强度效应 经济发展效应 人口规模效应
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
0 fig.canvas.print_figure(bytes_io, **kw)
21.38 -0.467209 -7.895550 8.007868 0.434891
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151:
1 21.47 -1.728968 1.260215 0.100610 0.458142 UserWarn
2 fig.canvas.print_figure(bytes_io, **kw)
22.85 -1.513608 -3.521423 5.981861 0.433170
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151:
3 26.17 0.716608 -2.573258 4.707300 0.469350 UserWarn
4 fig.canvas.print_figure(bytes_io, **kw)
28.70 -1.660170 -2.432479 6.179859 0.442791
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
print(data.describe())
fig.canvas.print_figure(bytes_io, **kw)
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io,
总碳排放量 能源结构强度效应 **kw)能源消费强度效应 经济发展效应 人口规模效应
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151:
count 31.000000 31.000000 31.000000 31.000000 31.000000 UserWarn
fig.canvas.print_figure(bytes_io,
mean 125.294839 **kw)12.321744
0.647819 -4.371787 1.231580
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151:
std 97.203422 15.297685 25.456669 21.572359 0.848427 UserWarn
fig.canvas.print_figure(bytes_io,
min **kw)
21.380000 -46.004608 -87.749489 -36.103723 0.433170
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151:
25% 46.840000 -3.331625 -7.995993 3.717793 0.547552 UserWarn
fig.canvas.print_figure(bytes_io,
50% **kw) 8.007868
98.630000 -1.120762 -2.432479 0.918265
75% 167.670000 2.668930 3.636398 18.245043 1.643344
max 341.000000 50.309210 51.379382 92.186397 3.191675
import matplotlib.pyplot as plt
plt.plot(data['总碳排放量'], data['能源消费强度效应'])
plt.xlabel('总碳排放量')
plt.ylabel('能源消费强度效应')
plt.title('能源消费强度效应随时间变化的趋势')
plt.show()
# 导入必要的库
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# 导入数据
data = pd.read_csv("lmdiplus.csv")
# 将数据拆分成自变量和因变量
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
# 对自变量进行标准化处理
sc = StandardScaler()
X = sc.fit_transform(X)
# 将数据拆分成训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# 定义神经网络模型
model = Sequential()
model.add(Dense(units=32, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=1))
# 编译模型
model.compile(optimizer='adam', loss='mean_squared_error')
# 训练模型
model.fit(X_train, y_train, epochs=100, batch_size=32)
# 评估模型
mse = model.evaluate(X_test, y_test)
print("均方误差:", mse)
# 使用模型进行预测
y_pred = model.predict(X_test)
Epoch 44/100
1/1 [==============================] - 0s 11ms/step - loss: 20587.5547
Epoch 45/100
1/1 [==============================] - 0s 12ms/step - loss: 20579.3535
Epoch 46/100
1/1 [==============================] - 0s 7ms/step - loss: 20570.9941
Epoch 47/100
1/1 [==============================] - 0s 22ms/step - loss: 20562.3379
Epoch 48/100
1/1 [==============================] - 0s 6ms/step - loss: 20553.5176
Epoch 49/100
1/1 [==============================] - 0s 6ms/step - loss: 20544.5254
Epoch 50/100
1/1 [==============================] - 0s 6ms/step - loss: 20535.3711
Epoch 51/100
1/1 [==============================] - 0s 7ms/step - loss: 20526.0801
Epoch 52/100
1/1 [==============================] - 0s 14ms/step - loss: 20516.6465
Epoch 53/100
1/1 [==============================] - 0s 8ms/step - loss: 20507.0430
Epoch 54/100
1/1 [==============================] - 0s 7ms/step - loss: 20497.2793
Epoch 55/100
1/1 [==============================] - 0s 10ms/step - loss: 20487.3340
Epoch 56/100
1/1 [==============================] - 0s 9ms/step - loss: 20477.0508
Epoch 57/100
1/1 [==============================] - 0s 9ms/step - loss: 20466.4375
Epoch 58/100
1/1 [==============================] - 0s 6ms/step - loss: 20455.6270
Epoch 59/100
1/1 [==============================] - 0s 9ms/step - loss: 20444.6191
Epoch 60/100
1/1 [==============================] - 0s 6ms/step - loss: 20433.4277
Epoch 61/100
1/1 [==============================] - 0s 11ms/step - loss: 20421.8809
Epoch 62/100
1/1 [==============================] - 0s 8ms/step - loss: 20410.0820
Epoch 63/100
1/1 [==============================] - 0s 10ms/step - loss: 20398.0020
Epoch 64/100
1/1 [==============================] - 0s 6ms/step - loss: 20385.5723
Epoch 65/100
1/1 [==============================] - 0s 5ms/step - loss: 20372.9062
Epoch 66/100
1/1 [==============================] - 0s 8ms/step - loss: 20360.0000
Epoch 67/100
1/1 [==============================] - 0s 5ms/step - loss: 20346.8418
Epoch 68/100
1/1 [==============================] - 0s 9ms/step - loss: 20333.4375
Epoch 69/100
1/1 [==============================] - 0s 8ms/step - loss: 20319.8027
Epoch 70/100
1/1 [==============================] - 0s 11ms/step - loss: 20305.9277
Epoch 71/100
1/1 [==============================] - 0s 10ms/step - loss: 20291.8184
Epoch 72/100
1/1 [==============================] - 0s 9ms/step - loss: 20277.4746
Epoch 73/100
import pandas as pd
df = pd.read_csv('lmdiplus.csv')
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)
0.8963126664634725