首页 > 其他分享 >向数据集加了一堆均值并没有提高多少

向数据集加了一堆均值并没有提高多少

时间:2022-12-05 12:44:31浏览次数:38  
标签:一堆 df reshape 均值 集加 np diff array axis

准确率为0.8。感觉乱塞特征并没有多大提升。 import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.fft as fft from sklearn import tree df = pd.read_csv('train.csv') df=df.drop(['ID'],axis=1) nmp=df.to_numpy() feature=nmp[:,:-1] df=pd.DataFrame(feature) sk=np.reshape(np.array(df.skew(axis=1)),(-1,1)) Q25=np.reshape(np.array(df.quantile(q=0.25,axis=1)),(-1,1)) Q75=np.reshape(np.array(df.quantile(q=0.75,axis=1)),(-1,1)) dmax=np.reshape(np.array(df.diff(1).max(axis=1)),(-1,1)) dmin=np.reshape(np.array(df.diff(1).min(axis=1)),(-1,1)) range_diff1=dmax-dmin dvar=np.reshape(np.array(df.diff(1).var(axis=1)),(-1,1)) dstd=np.reshape(np.array(df.diff(1).std(axis=1)),(-1,1)) dmean=np.reshape(np.array(df.diff(1).mean(axis=1)),(-1,1)) dmedia=np.reshape(np.array(df.diff(1).median(axis=1)),(-1,1)) dsk=np.reshape(np.array(df.diff(1).skew(axis=1)),(-1,1)) dQ25=np.reshape(np.array(df.diff(1).quantile(q=0.25,axis=1)),(-1,1)) dQ75=np.reshape(np.array(df.diff(1).quantile(q=0.75,axis=1)),(-1,1)) dk=np.reshape(np.array(df.diff(1).kurtosis(axis=1)),(-1,1))

feature=nmp[:,:-1] label=nmp[:,-1]#(210,240) min=np.reshape(nmp[:,:-1].min(-1),(-1,1)) max=np.reshape(nmp[:,:-1].max(-1),(-1,1)) ra=min-max var=np.reshape(np.var(nmp[:,:-1],axis=1),(-1,1)) std=np.reshape(np.std(nmp[:,:-1],axis=1),(-1,1)) mean=np.reshape(np.mean(nmp[:,:-1],axis=1),(-1,1)) media=np.reshape(np.median(nmp[:,:-1],axis=1),(-1,1))
feature1=torch.fft.fft(torch.Tensor(feature)) feature1=torch.abs(feature1)/240*2 feature1=feature1.detach().numpy() df=pd.DataFrame(feature1)
sk3=np.reshape(np.array(df.skew(axis=1)),(-1,1)) Q253=np.reshape(np.array(df.quantile(q=0.25,axis=1)),(-1,1)) Q753=np.reshape(np.array(df.quantile(q=0.75,axis=1)),(-1,1)) dmax3=np.reshape(np.array(df.diff(1).max(axis=1)),(-1,1)) dmin3=np.reshape(np.array(df.diff(1).min(axis=1)),(-1,1)) range_diff3=dmax-dmin dvar3=np.reshape(np.array(df.diff(1).var(axis=1)),(-1,1)) dstd3=np.reshape(np.array(df.diff(1).std(axis=1)),(-1,1)) dmean3=np.reshape(np.array(df.diff(1).mean(axis=1)),(-1,1)) dmedia3=np.reshape(np.array(df.diff(1).median(axis=1)),(-1,1)) dsk3=np.reshape(np.array(df.diff(1).skew(axis=1)),(-1,1)) dQ253=np.reshape(np.array(df.diff(1).quantile(q=0.25,axis=1)),(-1,1)) dQ753=np.reshape(np.array(df.diff(1).quantile(q=0.75,axis=1)),(-1,1)) dk3=np.reshape(np.array(df.diff(1).kurtosis(axis=1)),(-1,1))





min1=np.reshape(feature1.min(-1),(-1,1)) max1=np.reshape(feature1.max(-1),(-1,1)) var1=np.reshape(np.var(feature1,axis=1),(-1,1)) std1=np.reshape(np.std(feature1,axis=1),(-1,1)) mean1=np.reshape(np.mean(feature1,axis=1),(-1,1)) media1=np.reshape(np.median(feature1,axis=1),(-1,1))
import scipy kur=np.reshape(scipy.stats.kurtosis(feature,axis=1),(-1,1)) ne=np.concatenate((sk3,Q253,Q753,dmax3,dmin3,dk3,range_diff3,dvar3,dstd3,dmean3,dsk3,dQ253,dQ753,dmedia3,dk,dsk,dQ25,dQ75,dmean,dmedia,dvar,dstd,range_diff1,dmax,dmin,Q25,Q75,sk,kur,feature,ra,min,max,var,std,mean,media,feature1,min1,max1,var1,std1,mean1,media1),axis=1)
from sklearn.model_selection import cross_val_score from sklearn import svm import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import GradientBoostingClassifier for i in range(300):     for j in range (20):     #clf = tree.DecisionTreeClassifier(criterion='gini',random_state=0,max_depth=i)         #clf=RandomForestClassifier(criterion='gini',n_estimators=10*i+1,max_depth=j+1)         clf=GradientBoostingClassifier(criterion='mse',n_estimators=5*i+1,max_depth=j+1)         scores = cross_val_score(clf,feature,label,cv=10)         print(i,j,scores.mean())         if scores.mean()>=0.9:             print('123')

标签:一堆,df,reshape,均值,集加,np,diff,array,axis
From: https://www.cnblogs.com/hahaah/p/16951980.html

相关文章