探索核函数的优势和缺陷
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
from time import time
import datetime
cancer = load_breast_cancer()
x = cancer.data
y = cancer.target
plt.scatter(x[:, 0], x[:, 1], c=y)
plt.show()
from sklearn.decomposition import PCA
x_dr = PCA(n_components=2).fit_transform(x)
plt.scatter(x_dr[:, 0], x_dr[:, 1], c=y)
plt.show()
from mpl_toolkits import mplot3d
r = np.exp(-(x**2).sum(1)) # r.shape(300,)
rlim = np.linspace(min(r), max(r), 500)
# 定义一个绘制三维图像的函数
# elev表示上下旋转的角度
# azim表示平行旋转的角度
def plot_3D(elev=45, azim=30, x=x, y=y) -> None:
ax = plt.subplot(projection="3d")
ax.scatter3D(x[:, 0], x[:, 1], r, c=y, s=50, cmap="rainbow")
ax.view_init(elev=elev, azim=azim)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("r")
plt.show()
plot_3D()
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=420
)
kernel = ["linear", "poly", "rbf", "sigmoid"]
time0 = time()
for i in kernel:
clf = SVC(kernel=i, gamma="auto", degree=1, cache_size=5000).fit(x_train, y_train)
print("The accuracy under kernel %s is %f" % (i, clf.score(x_test, y_test)))
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
The accuracy under kernel linear is 0.929825
00:00:278035
The accuracy under kernel poly is 0.923977
00:00:311496
The accuracy under kernel rbf is 0.596491
00:00:329552
The accuracy under kernel sigmoid is 0.596491
00:00:336953
import pandas as pd
data = pd.DataFrame(x)
data.describe([0.1, 0.15, 0.25, 0.50, 0.75, 0.90, 0.99]).T
# 量纲不统一,需要标准化
count | mean | std | min | 10% | 15% | 25% | 50% | 75% | 90% | 99% | max | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 569.0 | 14.127292 | 3.524049 | 6.981000 | 10.260000 | 11.040000 | 11.700000 | 13.370000 | 15.780000 | 19.530000 | 24.371600 | 28.11000 |
1 | 569.0 | 19.289649 | 4.301036 | 9.710000 | 14.078000 | 14.934000 | 16.170000 | 18.840000 | 21.800000 | 24.992000 | 30.652000 | 39.28000 |
2 | 569.0 | 91.969033 | 24.298981 | 43.790000 | 65.830000 | 70.670000 | 75.170000 | 86.240000 | 104.100000 | 129.100000 | 165.724000 | 188.50000 |
3 | 569.0 | 654.889104 | 351.914129 | 143.500000 | 321.600000 | 371.180000 | 420.300000 | 551.100000 | 782.700000 | 1177.400000 | 1786.600000 | 2501.00000 |
4 | 569.0 | 0.096360 | 0.014064 | 0.052630 | 0.079654 | 0.081948 | 0.086370 | 0.095870 | 0.105300 | 0.114820 | 0.132888 | 0.16340 |
5 | 569.0 | 0.104341 | 0.052813 | 0.019380 | 0.049700 | 0.053620 | 0.064920 | 0.092630 | 0.130400 | 0.175460 | 0.277192 | 0.34540 |
6 | 569.0 | 0.088799 | 0.079720 | 0.000000 | 0.013686 | 0.019932 | 0.029560 | 0.061540 | 0.130700 | 0.203040 | 0.351688 | 0.42680 |
7 | 569.0 | 0.048919 | 0.038803 | 0.000000 | 0.011158 | 0.014366 | 0.020310 | 0.033500 | 0.074000 | 0.100420 | 0.164208 | 0.20120 |
8 | 569.0 | 0.181162 | 0.027414 | 0.106000 | 0.149580 | 0.154620 | 0.161900 | 0.179200 | 0.195700 | 0.214940 | 0.259564 | 0.30400 |
9 | 569.0 | 0.062798 | 0.007060 | 0.049960 | 0.055338 | 0.056302 | 0.057700 | 0.061540 | 0.066120 | 0.072266 | 0.085438 | 0.09744 |
10 | 569.0 | 0.405172 | 0.277313 | 0.111500 | 0.183080 | 0.202840 | 0.232400 | 0.324200 | 0.478900 | 0.748880 | 1.291320 | 2.87300 |
11 | 569.0 | 1.216853 | 0.551648 | 0.360200 | 0.640400 | 0.710700 | 0.833900 | 1.108000 | 1.474000 | 1.909400 | 2.915440 | 4.88500 |
12 | 569.0 | 2.866059 | 2.021855 | 0.757000 | 1.280200 | 1.435400 | 1.606000 | 2.287000 | 3.357000 | 5.123200 | 9.690040 | 21.98000 |
13 | 569.0 | 40.337079 | 45.491006 | 6.802000 | 13.160000 | 14.938000 | 17.850000 | 24.530000 | 45.190000 | 91.314000 | 177.684000 | 542.20000 |
14 | 569.0 | 0.007041 | 0.003003 | 0.001713 | 0.004224 | 0.004539 | 0.005169 | 0.006380 | 0.008146 | 0.010410 | 0.017258 | 0.03113 |
15 | 569.0 | 0.025478 | 0.017908 | 0.002252 | 0.009169 | 0.010860 | 0.013080 | 0.020450 | 0.032450 | 0.047602 | 0.089872 | 0.13540 |
16 | 569.0 | 0.031894 | 0.030186 | 0.000000 | 0.007726 | 0.010734 | 0.015090 | 0.025890 | 0.042050 | 0.058520 | 0.122292 | 0.39600 |
17 | 569.0 | 0.011796 | 0.006170 | 0.000000 | 0.005493 | 0.006325 | 0.007638 | 0.010930 | 0.014710 | 0.018688 | 0.031194 | 0.05279 |
18 | 569.0 | 0.020542 | 0.008266 | 0.007882 | 0.013012 | 0.013746 | 0.015160 | 0.018730 | 0.023480 | 0.030120 | 0.052208 | 0.07895 |
19 | 569.0 | 0.003795 | 0.002646 | 0.000895 | 0.001710 | 0.001892 | 0.002248 | 0.003187 | 0.004558 | 0.006185 | 0.012650 | 0.02984 |
20 | 569.0 | 16.269190 | 4.833242 | 7.930000 | 11.234000 | 11.996000 | 13.010000 | 14.970000 | 18.790000 | 23.682000 | 30.762800 | 36.04000 |
21 | 569.0 | 25.677223 | 6.146258 | 12.020000 | 17.800000 | 19.252000 | 21.080000 | 25.410000 | 29.720000 | 33.646000 | 41.802400 | 49.54000 |
22 | 569.0 | 107.261213 | 33.602542 | 50.410000 | 72.178000 | 77.984000 | 84.110000 | 97.660000 | 125.400000 | 157.740000 | 208.304000 | 251.20000 |
23 | 569.0 | 880.583128 | 569.356993 | 185.200000 | 384.720000 | 440.080000 | 515.300000 | 686.500000 | 1084.000000 | 1673.000000 | 2918.160000 | 4254.00000 |
24 | 569.0 | 0.132369 | 0.022832 | 0.071170 | 0.102960 | 0.108620 | 0.116600 | 0.131300 | 0.146000 | 0.161480 | 0.188908 | 0.22260 |
25 | 569.0 | 0.254265 | 0.157336 | 0.027290 | 0.093676 | 0.108820 | 0.147200 | 0.211900 | 0.339100 | 0.447840 | 0.778644 | 1.05800 |
26 | 569.0 | 0.272188 | 0.208624 | 0.000000 | 0.045652 | 0.071612 | 0.114500 | 0.226700 | 0.382900 | 0.571320 | 0.902380 | 1.25200 |
27 | 569.0 | 0.114606 | 0.065732 | 0.000000 | 0.038460 | 0.050026 | 0.064930 | 0.099930 | 0.161400 | 0.208940 | 0.269216 | 0.29100 |
28 | 569.0 | 0.290076 | 0.061867 | 0.156500 | 0.226120 | 0.235100 | 0.250400 | 0.282200 | 0.317900 | 0.360080 | 0.486908 | 0.66380 |
29 | 569.0 | 0.083946 | 0.018061 | 0.055040 | 0.065792 | 0.067848 | 0.071460 | 0.080040 | 0.092080 | 0.106320 | 0.140628 | 0.20750 |
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(x)
count | mean | std | min | 10% | 15% | 25% | 50% | 75% | 90% | 99% | max | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 569.0 | -2.497514e-17 | 1.00088 | -2.029648 | -1.098366 | -0.876835 | -0.689385 | -0.215082 | 0.469393 | 1.534446 | 2.909529 | 3.971288 |
1 | 569.0 | -2.497514e-17 | 1.00088 | -2.229249 | -1.212786 | -1.013589 | -0.725963 | -0.104636 | 0.584176 | 1.326975 | 2.644095 | 4.651889 |
2 | 569.0 | 0.000000e+00 | 1.00088 | -1.984504 | -1.076672 | -0.877311 | -0.691956 | -0.235980 | 0.499677 | 1.529432 | 3.037982 | 3.976130 |
3 | 569.0 | 3.746271e-17 | 1.00088 | -1.454443 | -0.947908 | -0.806898 | -0.667195 | -0.295187 | 0.363507 | 1.486075 | 3.218702 | 5.250529 |
4 | 569.0 | -6.243785e-17 | 1.00088 | -3.112085 | -1.188910 | -1.025656 | -0.710963 | -0.034891 | 0.636199 | 1.313694 | 2.599511 | 4.770911 |
5 | 569.0 | -1.248757e-17 | 1.00088 | -1.610136 | -1.035527 | -0.961238 | -0.747086 | -0.221940 | 0.493857 | 1.347811 | 3.275782 | 4.568425 |
6 | 569.0 | 1.373633e-16 | 1.00088 | -1.114873 | -0.943046 | -0.864627 | -0.743748 | -0.342240 | 0.526062 | 1.434288 | 3.300560 | 4.243589 |
7 | 569.0 | 6.243785e-17 | 1.00088 | -1.261820 | -0.974010 | -0.891263 | -0.737944 | -0.397721 | 0.646935 | 1.328412 | 2.973759 | 3.927930 |
8 | 569.0 | 1.248757e-16 | 1.00088 | -2.744117 | -1.153036 | -0.969028 | -0.703240 | -0.071627 | 0.530779 | 1.233221 | 2.862418 | 4.484751 |
9 | 569.0 | -3.121893e-17 | 1.00088 | -1.819865 | -1.057477 | -0.920820 | -0.722639 | -0.178279 | 0.470983 | 1.342243 | 3.209454 | 4.910919 |
10 | 569.0 | -8.741299e-17 | 1.00088 | -1.059924 | -0.801577 | -0.730259 | -0.623571 | -0.292245 | 0.266100 | 1.240514 | 3.198294 | 8.906909 |
11 | 569.0 | -2.185325e-17 | 1.00088 | -1.554264 | -1.045885 | -0.918336 | -0.694809 | -0.197498 | 0.466552 | 1.256518 | 3.081820 | 6.655279 |
12 | 569.0 | 7.492542e-17 | 1.00088 | -1.044049 | -0.785049 | -0.708220 | -0.623768 | -0.286652 | 0.243031 | 1.117354 | 3.378079 | 9.461986 |
13 | 569.0 | 5.619407e-17 | 1.00088 | -0.737829 | -0.597942 | -0.558823 | -0.494754 | -0.347783 | 0.106773 | 1.121579 | 3.021867 | 11.041842 |
14 | 569.0 | 1.404852e-17 | 1.00088 | -1.776065 | -0.939031 | -0.834027 | -0.624018 | -0.220335 | 0.368355 | 1.123053 | 3.405812 | 8.029999 |
15 | 569.0 | 4.370650e-17 | 1.00088 | -1.298098 | -0.911510 | -0.817001 | -0.692926 | -0.281020 | 0.389654 | 1.236492 | 3.598943 | 6.143482 |
16 | 569.0 | 6.243785e-17 | 1.00088 | -1.057501 | -0.801336 | -0.701593 | -0.557161 | -0.199065 | 0.336752 | 0.882848 | 2.997338 | 12.072680 |
17 | 569.0 | -1.873136e-17 | 1.00088 | -1.913447 | -1.022462 | -0.887439 | -0.674490 | -0.140496 | 0.472657 | 1.117927 | 3.146456 | 6.649601 |
18 | 569.0 | -2.497514e-17 | 1.00088 | -1.532890 | -0.911757 | -0.822886 | -0.651681 | -0.219430 | 0.355692 | 1.159654 | 3.834036 | 7.071917 |
19 | 569.0 | 3.121893e-17 | 1.00088 | -1.096968 | -0.788466 | -0.719776 | -0.585118 | -0.229940 | 0.288642 | 0.904208 | 3.349301 | 9.851593 |
20 | 569.0 | 4.995028e-17 | 1.00088 | -1.726901 | -1.042700 | -0.884903 | -0.674921 | -0.269040 | 0.522016 | 1.535063 | 3.001373 | 4.094189 |
21 | 569.0 | -3.746271e-17 | 1.00088 | -2.223994 | -1.282757 | -1.046308 | -0.748629 | -0.043516 | 0.658341 | 1.297666 | 2.625885 | 3.885905 |
22 | 569.0 | -2.497514e-17 | 1.00088 | -1.693361 | -1.044983 | -0.872046 | -0.689578 | -0.285980 | 0.540279 | 1.503553 | 3.009644 | 4.287337 |
23 | 569.0 | 1.123881e-16 | 1.00088 | -1.222423 | -0.871684 | -0.774366 | -0.642136 | -0.341181 | 0.357589 | 1.393000 | 3.581882 | 5.930172 |
24 | 569.0 | 9.990056e-17 | 1.00088 | -2.682695 | -1.289152 | -1.041041 | -0.691230 | -0.046843 | 0.597545 | 1.276124 | 2.478455 | 3.955374 |
25 | 569.0 | 6.243785e-17 | 1.00088 | -1.443878 | -1.021571 | -0.925234 | -0.681083 | -0.269501 | 0.539669 | 1.231407 | 3.335783 | 5.112877 |
26 | 569.0 | -4.995028e-17 | 1.00088 | -1.305831 | -1.086814 | -0.962270 | -0.756514 | -0.218232 | 0.531141 | 1.435090 | 3.023359 | 4.700669 |
27 | 569.0 | -1.248757e-17 | 1.00088 | -1.745063 | -1.159448 | -0.983337 | -0.756400 | -0.223469 | 0.712510 | 1.436382 | 2.354181 | 2.685877 |
28 | 569.0 | -1.123881e-16 | 1.00088 | -2.160960 | -1.034661 | -0.889384 | -0.641864 | -0.127409 | 0.450138 | 1.132518 | 3.184317 | 6.046041 |
29 | 569.0 | -7.492542e-17 | 1.00088 | -1.601839 | -1.006009 | -0.892074 | -0.691912 | -0.216444 | 0.450762 | 1.239884 | 3.141089 | 6.846856 |
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.3, random_state=420
)
kernel = ["linear", "poly", "rbf", "sigmoid"]
time0 = time()
for i in kernel:
clf = SVC(kernel=i, gamma="auto", degree=1, cache_size=5000).fit(x_train, y_train)
print("The accuracy under kernel %s is %f" % (i, clf.score(x_test, y_test)))
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
The accuracy under kernel linear is 0.976608
00:00:003820
The accuracy under kernel poly is 0.964912
00:00:007333
The accuracy under kernel rbf is 0.970760
00:00:013197
The accuracy under kernel sigmoid is 0.953216
00:00:019282
gamma_range = np.logspace(-10, 1, 50)
score = []
for i in gamma_range:
clf = SVC(kernel="rbf", gamma=i, degree=1, cache_size=5000).fit(x_train, y_train)
score.append(clf.score(x_test, y_test))
print(max(score), gamma_range[score.index(max(score))])
plt.plot(gamma_range, score)
plt.show()
0.9766081871345029 0.012067926406393264
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
gamma_range = np.logspace(-10, 1, 20)
coef0_range = np.linspace(0, 5, 10)
param = dict(gamma=gamma_range, coef0=coef0_range)
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.3, random_state=420)
grid = GridSearchCV(
SVC(kernel="poly", cache_size=5000, degree=1), param_grid=param, cv=cv
)
grid.fit(x, y)
print(
"The best parameters are %s with a score of %0.5f"
% (grid.best_params_, grid.best_score_)
)
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
The best parameters are {'coef0': 0.0, 'gamma': 0.18329807108324375} with a score of 0.96959
41:01:513573
重要参数 C
# (0.01,30,50)
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
clf = SVC(kernel="linear", C=c, cache_size=5000).fit(x_train, y_train)
score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
0.9766081871345029 1.2340816326530613
# (0.01,30,50)
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
clf = SVC(kernel="rbf", C=c, cache_size=5000).fit(x_train, y_train)
score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
0.9883040935672515 25.103673469387758
# 进一步细化
C_range = np.linspace(0.01, 30, 50)
score = []
for c in C_range:
clf = SVC(kernel="rbf", C=c, cache_size=5000).fit(x_train, y_train)
score.append(clf.score(x_test, y_test))
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
0.9883040935672515 25.026315789473685
10:56:038874
# 进行交叉验证
from sklearn.model_selection import cross_val_score
C_range = np.linspace(3, 4, 50)
score = []
for c in C_range:
clf = SVC(kernel="rbf", C=c, cache_size=5000)
score.append(cross_val_score(clf, x, y, cv=10).mean())
print(max(score), C_range[score.index(max(score))])
plt.plot(C_range, score)
plt.show()
print(datetime.datetime.fromtimestamp(time() - time0).strftime("%M:%S:%f"))
0.982456140350877 3.0
24:23:139221
clf = SVC(kernel="rbf", C=3, gamma="auto", cache_size=5000).fit(x_train, y_train)
clf.score(x_test, y_test)
clf.get_params()
{'C': 3,
'break_ties': False,
'cache_size': 5000,
'class_weight': None,
'coef0': 0.0,
'decision_function_shape': 'ovr',
'degree': 3,
'gamma': 'auto',
'kernel': 'rbf',
'max_iter': -1,
'probability': False,
'random_state': None,
'shrinking': True,
'tol': 0.001,
'verbose': False}
标签:kernel,函数,探索,569.0,1.00088,range,score,17,缺陷
From: https://www.cnblogs.com/thankcat/p/17353083.html