Lasso 回归选取关键属性
#%% import numpy as np import pandas as pd from sklearn.linear_model import Lasso inputfile = './data/data.csv' data = pd.read_csv(inputfile) lasso = Lasso(1000) lasso.fit(data.iloc[:, 0:14], data['y']) print('相关系数为 :', np.round(lasso.coef_, 5)) #%% print('相关系数非零个数为:', np.sum(lasso.coef_ != 0)) #%% mask = lasso.coef_ != 0 print('相关系数是否为零:', mask) #%% outputfile = './data/new_reg_data.csv' new_reg_data = data.iloc[:] print(new_reg_data) new_reg_data.to_csv(outputfile) print('输出数据的维度为:', new_reg_data.shape)
二、构建灰色预测模型并预测
#%% import numpy as np import pandas as pd from data.GM11 import GM11 #%% inputfile1 = './data/new_reg_data.csv' inputfile2 = './data/data.csv' new_reg_data = pd.read_csv(inputfile1) data = pd.read_csv(inputfile2) new_reg_data.index = range(1997,2017) new_reg_data.loc[2017] = None new_reg_data.loc[2018] = None cols = ['x1','x3','x4','x5','x6','x7','x8','x13'] for i in cols: f = GM11(new_reg_data.loc[range(1997,2016),i].values)[0] new_reg_data.loc[2017,i] = f(len(new_reg_data)-1) new_reg_data.loc[2018,i] = f(len(new_reg_data)) new_reg_data[i] = new_reg_data[i].round(2) outputfile = './data/new_reg_data_GM11.xls' y =list(data['y'].values) y.extend([np.nan,np.nan]) new_reg_data['y'] = y new_reg_data.to_excel(outputfile) print('预测结果为:\n',new_reg_data.loc[2014:2016,:])
三、构建支持向量回归预测模型
#%% import matplotlib.pyplot as plt from sklearn.svm import LinearSVR import pandas as pd import numpy as np #%% inputfile = './data/new_reg_data_GM11.xls' data = pd.read_excel(inputfile) data = data.replace(np.nan,0) feature = ['x1','x3','x4','x5','x6','x7','x8','x13'] data.index = range(1997,2019) data_train = data.loc[range(1997,2019)].copy() # print(np.isnan(data).any()) data_mean = data_train.mean() data_std = data_train.std() data_train = (data_train - data_mean) / data_std x_train = data_train[feature].values y_train = data_train['y'].values #%% linearsvr = LinearSVR() linearsvr.fit(x_train,y_train) x = ((data[feature] - data_mean[feature])/data_std[feature]).values #%% data[u'y_pred'] = linearsvr.predict(x) * data_std['y'] +data_mean['y'] outputfile = './data/new_reg_data_GM11_revenue.xls' data.to_excel(outputfile) #%% print('真实值与预测值分别为:\n',data[['y','y_pred']]) #%% data = data.loc[1997:2016,:] print(data) fig = data[['y','y_pred']].plot(style = ['b-o','y-*'])
四、结果
标签:因素,预测,#%%,train,财政收入,import,new,data,reg From: https://www.cnblogs.com/zhilin00/p/17180866.html