import xgboost as xgb import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt # 加载数据集 data = np.loadtxt('dianli.csv', delimiter=',') X = data[:, :-1] # 特征矩阵 y = data[:, -1] # 目标变量 # 划分训练集和测试集 train_size = int(len(X) * 0.8) train_X, test_X = X[:train_size], X[train_size:] train_y, test_y = y[:train_size], y[train_size:] # 定义XGBoost模型 model = xgb.XGBRegressor() # 训练模型 model.fit(train_X, train_y) # 在测试集上进行预测 y_pred = model.predict(test_X) # 计算均方根误差(RMSE) rmse = np.sqrt(mean_squared_error(test_y, y_pred)) print("Root Mean Squared Error:", rmse) # 输出各个特征的重要程度 importance = model.feature_importances_ feature_names = ['Feature 1', 'Feature 2', 'Feature 3', ...] # 替换为实际特征的名称 sorted_indices = np.argsort(importance)[::-1] # 降序排列 sorted_importance = importance[sorted_indices] sorted_feature_names = [feature_names[i] for i in sorted_indices] # 可视化特征重要程度 plt.figure(figsize=(10, 6)) plt.bar(range(len(importance)), sorted_importance) plt.xticks(range(len(importance)), sorted_feature_names, rotation=90) plt.xlabel('Features') plt.ylabel('Importance') plt.title('Feature Importance') plt.show()