Paste: xgb
Author: | z |
Mode: | factor |
Date: | Wed, 19 Jul 2023 02:27:59 |
Plain Text |
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# 加载数据集
data = np.loadtxt('dianli.csv', delimiter=',')
X = data[:, :-1] # 特征矩阵
y = data[:, -1] # 目标变量
# 划分训练集和测试集
train_size = int(len(X) * 0.8)
train_X, test_X = X[:train_size], X[train_size:]
train_y, test_y = y[:train_size], y[train_size:]
# 定义XGBoost模型
model = xgb.XGBRegressor()
# 训练模型
model.fit(train_X, train_y)
# 在测试集上进行预测
y_pred = model.predict(test_X)
# 计算均方根误差(RMSE)
rmse = np.sqrt(mean_squared_error(test_y, y_pred))
print("Root Mean Squared Error:", rmse)
# 输出各个特征的重要程度
importance = model.feature_importances_
feature_names = ['Feature 1', 'Feature 2', 'Feature 3', ...] # 替换为实际特征的名称
sorted_indices = np.argsort(importance)[::-1] # 降序排列
sorted_importance = importance[sorted_indices]
sorted_feature_names = [feature_names[i] for i in sorted_indices]
# 可视化特征重要程度
plt.figure(figsize=(10, 6))
plt.bar(range(len(importance)), sorted_importance)
plt.xticks(range(len(importance)), sorted_feature_names, rotation=90)
plt.xlabel('Features')
plt.ylabel('Importance')
plt.title('Feature Importance')
plt.show()
New Annotation