参考:数据科学 | 避坑!Python特征重要性分析中存在的问题
模型代码(复制前一个博客的内容):
查看代码
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(stat_data, mode_data, test_size=0.2)
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
gbm = lgb.LGBMRegressor(objective='regression',num_leaves=1024,learning_rate=0.05,n_estimators=512,max_depth=15)
gbm.fit(X_train, y_train,eval_set=[(X_test, y_test)],eval_metric='l1',early_stopping_rounds=5)
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
特征重要性代码:
feature_list = ['di_mean', "di_median", "di_std", "di_25p", "di_75p",
've_mean', "ve_median", "ve_std", "ve_25p", "ve_75p",
'ac_mean', "ac_median", "ac_std", "ac_25p", "ac_75p",
'be_mean', "be_median", "be_std", "be_25p", "be_75p",
'br_mean', "br_median", "br_std", "br_25p", "br_75p",]
importances = gbm.feature_importances_ #调用Sklearn包得到重要性排序
indices = np.argsort(importances)[::-1]
# 输出排序
print("Feature ranking:")
featurerank=[]
for f in range(X_train.shape[1]):
featurerank.append(feature_list[indices[f]])
print("%d. feature %s (%f)" % (f + 1, feature_list[indices[f]], importances[indices[f]]))
#特征重要性可视化
plt.figure(figsize=(15,12))
feature_imp = pd.Series(importances,index=feature_list).sort_values(ascending=False)
sns.barplot(x= feature_imp,y=feature_imp.index)
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
plt.legend()
#plt.show()
plt.savefig("Important Features.pdf", dpi=200, bbox_inches='tight')
可视化效果:
标签:plt,feature,train,可视化,重要性,test,import,800,mean From: https://www.cnblogs.com/alex-bn-lee/p/17052018.html