注意事项
- 最好使用回归而不是分类,回归可以打分,可以认为划定阈值,从而调整灵敏和特异
参数调优-以SVM为例
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
train_x = sample_feature_train_df.iloc[:,1:-1]
train_y = sample_feature_train_df.iloc[:,-1]
test_x = sample_feature_test_df.iloc[:,1:-1]
test_y = sample_feature_test_df.iloc[:,-1]
param_grid=[{"kernel":["rbf"],"C":[0.1, 1, 10,15,20], "gamma": [10,5,1, 0.1, 0.01]},
{"kernel":["poly"],"C": [0.1, 1, 10,15,20], "gamma": [10,5,1, 0.1, 0.01],"degree":[3,5,10,15,20],"coef0":[0,0.1,1,5,10,15]},
{"kernel":["sigmoid"], "C": [0.1, 1, 10,15,20], "gamma": [10,5,1, 0.1, 0.01],"coef0":[0,0.1,1,5,10,15]}]
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=4)
grid.fit(train_x,train_y)
print('grid_best_params:', grid.best_params_)
print('grid.best_score_:', grid.best_score_)
随机森林
from sklearn.model_selection import GridSearchCV
train_x = sample_feature_train_df.iloc[:,1:-1]
train_y = sample_feature_train_df.iloc[:,-1]
test_x = sample_feature_test_df.iloc[:,1:-1]
test_y = sample_feature_test_df.iloc[:,-1]
# 参数搜索
param_grid=[{"n_estimators":[10,50,80,100,150,200,300],"criterion":['gini','entropy'], "max_depth": [None,1,3,5,10,15,20,40]}]
grid = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv=5)
grid.fit(train_x,train_y)
print('grid_best_params:', grid.best_params_)
print('grid.best_score_:', grid.best_score_)
# 训练模型,并利用auc评估模型性能
rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=0)
auc_list=[]
for train_index, valid_index in rskf.split(train_x, train_y):
X_train1, y_train1 = train_x.iloc[train_index, :], train_y.iloc[train_index]
X_valid1, y_valid1 = train_x.iloc[valid_index, :], train_y.iloc[valid_index]
clf = RandomForestClassifier(n_estimators=100, max_depth=None,min_samples_split=2, random_state=0)
clf.fit(X_train1, y_train1)
predict_value = clf.predict(X_valid1)
auc = roc_auc_score(y_valid1,predict_value)
# print(auc)
auc_list.append(auc)
print(np.mean(auc_list))
SVM
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
train_x = sample_feature_train_df.iloc[:,1:-1]
train_y = sample_feature_train_df.iloc[:,-1]
test_x = sample_feature_test_df.iloc[:,1:-1]
test_y = sample_feature_test_df.iloc[:,-1]
# 训练模型,并利用auc评估模型性能
rskf = RepeatedStratifiedKFold(n_splits=10, n_repeats=1, random_state=0)
auc_list=[]
for train_index, valid_index in rskf.split(train_x, train_y):
X_train1, y_train1 = train_x.iloc[train_index, :], train_y.iloc[train_index]
X_valid1, y_valid1 = train_x.iloc[valid_index, :], train_y.iloc[valid_index]
clf = svm.SVC(C=10)
clf.fit(X_train1, y_train1)
predict_value = clf.predict(X_valid1)
auc = roc_auc_score(y_valid1,predict_value)
# print(auc)
auc_list.append(auc)
print(np.mean(auc_list))
标签:10,auc,SVM,df,train,grid,随机,iloc,调优
From: https://www.cnblogs.com/kang1010/p/16851188.html