9

import
pandas as pd
import numpy as np
from sklearn.decomposition import FactorAnalysis
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_csv('/content/lmdiplus.csv')
# 筛选需要进行因子分析的变量
variables = data[['能源结构强度效应', '能源消费强度效应', '经济发展效应', '人口规模效应']]
# 使用因子分析进行降维
fa = FactorAnalysis(n_components=2)
fa.fit(variables)
factor_scores = fa.transform(variables)
# 绘制因子得分图
plt.scatter(factor_scores[:, 0], factor_scores[:, 1])
plt.xlabel('Factor 1')
plt.ylabel('Factor 2')
plt.title('Factor Scores Plot')
plt.show()
# 输出因子载荷矩阵
factor_loadings = pd.DataFrame(fa.components_.T, index=variables.columns, columns=['Factor 1', 'Factor 2'])
print(factor_loadings)
Factor 1 Factor 2
能源结构强度效应 -1.789824 14.535598
能源消费强度效应 -20.187946 -8.608929
经济发展效应 20.829704 0.697969
人口规模效应 0.318941 0.135739
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import plotly.graph_objs as go
# 加载数据集
# 提取用于聚类的数据特征列
X = data[['能源结构强度效应', '能源消费强度效应', '经济发展效应']]
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 创建KMeans模型
kmeans = KMeans(n_clusters=3)
# 进行聚类
kmeans.fit(X_scaled)
# 获取聚类结果
labels = kmeans.labels_
# 使用 Plotly 绘制四维散点图
fig = go.Figure(data=[go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 1], z=X_scaled[:, 2],
mode='markers', marker=dict(size=5, color=labels))])
fig.update_layout(scene=dict(xaxis_title='能源结构强度效应', yaxis_title='能源消费强度效应',
zaxis_title='经济发展效应',), margin=dict(l=0, r=0, b=0, t=0))
fig.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureW
The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the val
# 创建一个字典来存储每个集群的数据点
cluster_data = {}
for i in range(kmeans.n_clusters):
cluster_data[i] = []
# 遍历每个数据点并将其附加到相应的集群
for j, label in enumerate(labels):
cluster_data[label].append(X.iloc[j])
# 打印每个集群的数据点
print(f"Data points in cluster {i}:")
print(pd.DataFrame(cluster_data[i]))
Data points in cluster 0:

能源结构强度效应能源消费强度效应经济发展效应
15 50.309210 -60.013775 12.726301
22 -1.120762 -38.459389 45.092080
24 7.923414 -24.543144 47.999139
30 -10.341841 -87.749489 92.186397
16 -46.004608 32.423473 14.374728
23 15.498906 39.830448 -24.677972
25 -8.687766 51.379382 -36.103723
0 -0.467209 -7.895550 8.007868
1 -1.728968 1.260215 0.100610
2 -1.513608 -3.521423 5.981861
3 0.716608 -2.573258 4.707300
4 -1.660170 -2.432479 6.179859
5 1.135795 -1.059513 4.935041
6 4.816562 2.167352 2.815126
7 -1.383597 3.035454 0.029918
8 -1.419268 -0.951772 1.934357
9 -0.726590 2.301715 3.571983
10 0.157858 4.237341 2.096379
11 3.041756 1.313583 3.863602
12 2.296103 -3.444902 7.993453
13 -4.637711 5.005593 10.531246
14 1.361179 -16.106803 20.930855
17 -9.565833 -3.850666 25.391493
18 7.295936 -5.130475 6.715656
19 -3.577447 7.024731 10.357393
20 -1.892200 -8.096436 21.155065
21 -13.869377 -13.596461 19.117341
26 -9.779991 -3.092230 17.372745
27 12.070560 5.961497 20.751664
28 34.921237 11.663567 17.003521
29 -3.085802 -20.612001 8.832763
# 进行聚类
mode='markers', marker=dict(size=5, color=labels),
text=data.index),
go.Scatter3d(x=X_scaled[:, 0], y=X_scaled[:, 1], z=X_scaled[:, 3],
text=data.index),
text=data.index),
text=data.index)])
zaxis_title='经济发展效应'), margin=dict(l=0, r=0, b=0, t=0))
fig.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureW
The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the val
import pandas as pd
from sklearn.cluster import KMeans
import plotly.graph_objs as go
# 加载数据集
# 提取用于聚类的数据特征列
X = data[['能源结构强度效应', '能源消费强度效应', '人口规模效应',]]
# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 进行聚类
mode='markers', marker=dict(size=5, color=labels))])
zaxis_title='人口规模效应',), margin=dict(l=0, r=0, b=0, t=0))
fig.show()
.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default val
# 创建一个字典来存储每个集群的数据点
cluster_data = {}
cluster_data[i] = []
# 遍历每个数据点并将其附加到相应的集群
for j, label in enumerate(labels):
cluster_data[label].append(X.iloc[j])
# 打印每个集群的数据点
print(f"Data points in cluster {i}:")
print(pd.DataFrame(cluster_data[i]))

能源结构强度效应能源消费强度效应人口规模效应
0 -0.467209 -7.895550 0.434891
1 -1.728968 1.260215 0.458142
2 -1.513608 -3.521423 0.433170
3 0.716608 -2.573258 0.469350
4 -1.660170 -2.432479 0.442791
5 1.135795 -1.059513 0.498676
6 4.816562 2.167352 0.510959
7 -1.383597 3.035454 0.588225
8 -1.419268 -0.951772 0.536683
9 -0.726590 2.301715 0.562892
10 0.157858 4.237341 0.558422
11 3.041756 1.313583 0.631058
12 2.296103 -3.444902 0.705346
13 -4.637711 5.005593 0.790872
14 1.361179 -16.106803 0.764769
16 -46.004608 32.423473 0.936407
17 -9.565833 -3.850666 0.995006
18 7.295936 -5.130475 1.228883
19 -3.577447 7.024731 1.345323
20 -1.892200 -8.096436 1.473571
21 -13.869377 -13.596461 1.488497
15 50.309210 -60.013775 0.918265
22 -1.120762 -38.459389 1.638070
24 7.923414 -24.543144 2.170591
26 -9.779991 -3.092230 2.439476
29 -3.085802 -20.612001 2.765040
30 -10.341841 -87.749489 3.014933
23 15.498906 39.830448 1.648618
25 -8.687766 51.379382 2.142107
27 12.070560 5.961497 2.396279
28 34.921237 11.663567 3.191675
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(X_train, y_train)
▾ LinearRegression
LinearRegression()
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)
Mean Squared Error: 644.1467604355554
coefficients = regressor.coef_
intercept = regressor.intercept_
print("Coefficients:", coefficients)
print("Intercept:", intercept)
Coefficients: [ 0.34144153 0.3274244 0.51924317 103.84242724]

Intercept: -10.41608924412212
pip install lazypredict
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/

Collecting lazypredict
Downloading lazypredict-0.2.12-py2.py3-none-any.whl (12 kB)
Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from lazypredict) (8.1.3)
Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from lazypredict) (1.2.0)
Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from lazypredict) (1.5.3)
Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from lazypredict) (4.65.0)
Requirement already satisfied: xgboost in /usr/local/lib/python3.9/dist-packages (from lazypredict) (1.7.5)
Requirement already satisfied: lightgbm in /usr/local/lib/python3.9/dist-packages (from lazypredict) (3.3.5)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/dist-packages (from lazypredict) (1.2.2)
Requirement already satisfied: wheel in /usr/local/lib/python3.9/dist-packages (from lightgbm->lazypredict) (0.40.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from lightgbm->lazypredict) (1.22.4)
Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from lightgbm->lazypredict) (1.10.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->lazypredict) (3.1.0)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->lazypredict) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->lazypredict) (2022.7.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas->lazypredict) (1.16.0
Installing collected packages: lazypredict
Successfully installed lazypredict-0.2.12
from lazypredict.Supervised import LazyClassifier
from sklearn.datasets import load_breast_cancer
import pandas as pd
X = data[['能源结构强度效应', '能源消费强度效应', '经济发展效应', '人口规模效应']]
y = data['总碳排放量']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
print(models)
100%|██████████| 29/29 [00:00<00:00, 82.13it/s]Empty DataFrame

Columns: [Accuracy, Balanced Accuracy, ROC AUC, F1 Score, Time Taken]
Index: []
from lazypredict.Supervised import LazyClassifier
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=123)
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
print(models)
LinearSVC 0.99 0.99 0.99 0.99

Perceptron 0.99 0.98 0.98 0.99
LogisticRegression 0.99 0.98 0.98 0.99
SVC 0.98 0.98 0.98 0.98
XGBClassifier 0.98 0.98 0.98 0.98
LabelPropagation 0.98 0.97 0.97 0.98
LabelSpreading 0.98 0.97 0.97 0.98
BaggingClassifier 0.97 0.97 0.97 0.97
PassiveAggressiveClassifier 0.98 0.97 0.97 0.98
SGDClassifier 0.98 0.97 0.97 0.98
RandomForestClassifier 0.97 0.97 0.97 0.97
CalibratedClassifierCV 0.98 0.97 0.97 0.98
LGBMClassifier 0.97 0.97 0.97 0.97
QuadraticDiscriminantAnalysis 0.96 0.97 0.97 0.97
ExtraTreesClassifier 0.97 0.96 0.96 0.97
RidgeClassifierCV 0.97 0.96 0.96 0.97
RidgeClassifier 0.97 0.96 0.96 0.97
AdaBoostClassifier 0.96 0.96 0.96 0.96
KNeighborsClassifier 0.96 0.96 0.96 0.96
BernoulliNB 0.95 0.95 0.95 0.95
LinearDiscriminantAnalysis 0.96 0.95 0.95 0.96
GaussianNB 0.95 0.95 0.95 0.95
NuSVC 0.95 0.94 0.94 0.95
ExtraTreeClassifier 0.94 0.93 0.93 0.94
NearestCentroid 0.95 0.93 0.93 0.95
DecisionTreeClassifier 0.93 0.93 0.93 0.93
DummyClassifier 0.64 0.50 0.50 0.50
Time Taken
Model
LinearSVC 0.08
Perceptron 0.02
LogisticRegression 0.04
SVC 0.05
XGBClassifier 0.15
LabelPropagation 0.08
LabelSpreading 0.03
BaggingClassifier 0.34
PassiveAggressiveClassifier 0.02
SGDClassifier 0.05
RandomForestClassifier 0.34
CalibratedClassifierCV 0.07
LGBMClassifier 0.14
QuadraticDiscriminantAnalysis 0.04
ExtraTreesClassifier 0.27
RidgeClassifierCV 0.04
RidgeClassifier 0.03
AdaBoostClassifier 1.33
KNeighborsClassifier 0.07
BernoulliNB 0.05
LinearDiscriminantAnalysis 0.06
GaussianNB 0.02
NuSVC 0.03
ExtraTreeClassifier 0.02
NearestCentroid 0.02
DecisionTreeClassifier 0.06
DummyClassifier 0.05
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# 读取数据
data = pd.read_csv("/content/lmdiplus.csv")
# 将数据分为自变量和因变量
X = data.iloc[:,1:].values
y = data.iloc[:,0].values
# 划分数据集
train_size = int(len(X) * 0.7)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
# 训练模型
model = LinearRegression()
model.fit(X_train, y_train)
# 预测结果
y_pred = model.predict(X_test)
# 计算均方误差
mse = mean_squared_error(y_test, y_pred)
print("均方误差：", mse)
# 输出回归系数
print("回归系数：", model.coef_)
均方误差： 1660.7356987576118
回归系数： [ 1.09242461 1.07164135 1.8201653 104.31937005]
import pandas as pd
import numpy as np
import seaborn as sns
# 读取数据集
df = pd.read_csv('/content/lmdiplus.csv', delimiter=',')
# 计算相关系数矩阵
corr_matrix = df.corr()
# 绘制相关性矩阵热力图
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix Heatmap')
plt.show()
/usr/local/lib/python3.9/dist packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/seaborn/utils.py:80: UserWarning: Glyph
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
fig.canvas.draw()
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151: UserWarn
fig.canvas.print_figure(bytes_io, **kw)
import pandas as pd
data = pd.read_csv('/content/lmdiplus.csv', delimiter=',')
print(data.head())
总碳排放量能源结构强度效应能源消费强度效应经济发展效应人口规模效应
0 fig.canvas.print_figure(bytes_io, **kw)
21.38 -0.467209 -7.895550 8.007868 0.434891
/usr/local/lib/python3.9/dist-packages/IPython/core/pylabtools.py:151:
1 21.47 -1.728968 1.260215 0.100610 0.458142 UserWarn
22.85 -1.513608 -3.521423 5.981861 0.433170
3 26.17 0.716608 -2.573258 4.707300 0.469350 UserWarn
28.70 -1.660170 -2.432479 6.179859 0.442791
print(data.describe())
fig.canvas.print_figure(bytes_io,
总碳排放量能源结构强度效应 **kw)能源消费强度效应经济发展效应人口规模效应
count 31.000000 31.000000 31.000000 31.000000 31.000000 UserWarn
mean 125.294839 **kw)12.321744
0.647819 -4.371787 1.231580
std 97.203422 15.297685 25.456669 21.572359 0.848427 UserWarn
min **kw)
21.380000 -46.004608 -87.749489 -36.103723 0.433170
25% 46.840000 -3.331625 -7.995993 3.717793 0.547552 UserWarn
50% **kw) 8.007868
98.630000 -1.120762 -2.432479 0.918265
75% 167.670000 2.668930 3.636398 18.245043 1.643344
max 341.000000 50.309210 51.379382 92.186397 3.191675
plt.plot(data['总碳排放量'], data['能源消费强度效应'])
plt.xlabel('总碳排放量')
plt.ylabel('能源消费强度效应')
plt.title('能源消费强度效应随时间变化的趋势')
plt.show()
/usr/local/lib/python3.9/dist packages/IPython/core/pylabtools.py:151: UserWarn

# 导入必要的库
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
# 导入数据
data = pd.read_csv("lmdiplus.csv")
# 将数据拆分成自变量和因变量
X = data.iloc[:, 1:].values
y = data.iloc[:, 0].values
# 对自变量进行标准化处理
sc = StandardScaler()
X = sc.fit_transform(X)
# 将数据拆分成训练集和测试集
# 定义神经网络模型
model = Sequential()
model.add(Dense(units=32, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=1))
# 编译模型
model.compile(optimizer='adam', loss='mean_squared_error')
# 训练模型
model.fit(X_train, y_train, epochs=100, batch_size=32)
# 评估模型
mse = model.evaluate(X_test, y_test)
print("均方误差：", mse)
# 使用模型进行预测
y_pred = model.predict(X_test)
Epoch 44/100
1/1 [==============================] - 0s 11ms/step - loss: 20587.5547
Epoch 45/100
1/1 [==============================] - 0s 12ms/step - loss: 20579.3535
Epoch 46/100
1/1 [==============================] - 0s 7ms/step - loss: 20570.9941
Epoch 47/100
1/1 [==============================] - 0s 22ms/step - loss: 20562.3379
Epoch 48/100
1/1 [==============================] - 0s 6ms/step - loss: 20553.5176
Epoch 49/100
1/1 [==============================] - 0s 6ms/step - loss: 20544.5254
Epoch 50/100
1/1 [==============================] - 0s 6ms/step - loss: 20535.3711
Epoch 51/100
1/1 [==============================] - 0s 7ms/step - loss: 20526.0801
Epoch 52/100
1/1 [==============================] - 0s 14ms/step - loss: 20516.6465
Epoch 53/100
1/1 [==============================] - 0s 8ms/step - loss: 20507.0430
Epoch 54/100
1/1 [==============================] - 0s 7ms/step - loss: 20497.2793
Epoch 55/100
1/1 [==============================] - 0s 10ms/step - loss: 20487.3340
Epoch 56/100
1/1 [==============================] - 0s 9ms/step - loss: 20477.0508
Epoch 57/100
1/1 [==============================] - 0s 9ms/step - loss: 20466.4375
Epoch 58/100
1/1 [==============================] - 0s 6ms/step - loss: 20455.6270
Epoch 59/100
1/1 [==============================] - 0s 9ms/step - loss: 20444.6191
Epoch 60/100
1/1 [==============================] - 0s 6ms/step - loss: 20433.4277
Epoch 61/100
1/1 [==============================] - 0s 11ms/step - loss: 20421.8809
Epoch 62/100
1/1 [==============================] - 0s 8ms/step - loss: 20410.0820
Epoch 63/100
1/1 [==============================] - 0s 10ms/step - loss: 20398.0020
Epoch 64/100
1/1 [==============================] - 0s 6ms/step - loss: 20385.5723
Epoch 65/100
1/1 [==============================] - 0s 5ms/step - loss: 20372.9062
Epoch 66/100
1/1 [==============================] - 0s 8ms/step - loss: 20360.0000
Epoch 67/100
1/1 [==============================] - 0s 5ms/step - loss: 20346.8418
Epoch 68/100
1/1 [==============================] - 0s 9ms/step - loss: 20333.4375
Epoch 69/100
1/1 [==============================] - 0s 8ms/step - loss: 20319.8027
Epoch 70/100
1/1 [==============================] - 0s 11ms/step - loss: 20305.9277
Epoch 71/100
1/1 [==============================] - 0s 10ms/step - loss: 20291.8184
Epoch 72/100
1/1 [==============================] - 0s 9ms/step - loss: 20277.4746
Epoch 73/100
import pandas as pd
df = pd.read_csv('lmdiplus.csv')
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)
0.8963126664634725

9

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

9

Uploaded by

Copyright:

Available Formats

import

Data points in cluster 0:

.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default val

Data points in cluster 0:

Mean Squared Error: 644.1467604355554

Coefficients: [ 0.34144153 0.3274244 0.51924317 103.84242724]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/

100%|██████████| 29/29 [00:00<00:00, 82.13it/s]Empty DataFrame

LinearSVC 0.99 0.99 0.99 0.99

/usr/local/lib/python3.9/dist packages/IPython/core/pylabtools.py:151: UserWarn

You might also like