import pandas as pd import numpy as np import xgboost as xgb from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.metrics import mean_squared_error # 读取数据集 data = pd.read_csv('dianli.csv') # 提取特征和目标变量 X = data.iloc[:, :-1] y = data.iloc[:, -1] # 特征工程(根据实际情况进行特征处理) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 定义XGBoost模型 xgb_model = xgb.XGBRegressor() # 参数调优 param_grid = { 'n_estimators': [100, 200, 300], 'learning_rate': [0.05, 0.1, 0.2], 'max_depth': [3, 4, 5] } grid_search = GridSearchCV(xgb_model, param_grid, cv=5) grid_search.fit(X_train, y_train) best_model = grid_search.best_estimator_ # 使用最佳模型进行预测 y_pred = best_model.predict(X_test) # 计算均方误差(MSE) mse = mean_squared_error(y_test, y_pred) print("均方误差(MSE):", mse) # 输出特征重要性 importance = best_model.feature_importances_ feature_names = X.columns feature_importance = pd.DataFrame({'Feature': feature_names, 'Importance': importance}) sorted_importance = feature_importance.sort_values(by='Importance', ascending=False) print("特征重要性:") print(sorted_importance)