Paste: xgb
Author: | z |
Mode: | factor |
Date: | Wed, 19 Jul 2023 05:15:42 |
Plain Text |
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
# 读取数据集
data = pd.read_csv('dianli.csv')
# 提取特征和目标变量
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
# 特征工程(根据实际情况进行特征处理)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义XGBoost模型
xgb_model = xgb.XGBRegressor()
# 参数调优
param_grid = {
'n_estimators': [100, 200, 300],
'learning_rate': [0.05, 0.1, 0.2],
'max_depth': [3, 4, 5]
}
grid_search = GridSearchCV(xgb_model, param_grid, cv=5)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
# 使用最佳模型进行预测
y_pred = best_model.predict(X_test)
# 计算均方误差(MSE)
mse = mean_squared_error(y_test, y_pred)
print("均方误差(MSE):", mse)
# 输出特征重要性
importance = best_model.feature_importances_
feature_names = X.columns
feature_importance = pd.DataFrame({'Feature': feature_names, 'Importance': importance})
sorted_importance = feature_importance.sort_values(by='Importance', ascending=False)
print("特征重要性:")
print(sorted_importance)
New Annotation