import numpy as np
import pandas as pd
inputfile = 'D:\data.csv'
data = pd.read_csv(inputfile)
#描述性统计分析
#依次计算最小值、最大值、均值、标准差
description = [data.min(),data.max(),data.mean(),data.std()]
#将结果存入数据框
description = pd.DataFrame(description,index = ['Min','Max','Mean','STD']).T
print('描述性统计结果:\n',np.round(description,2)) #保留两位小数
corr = data.corr(method = 'pearson')
print('相关系数矩阵为:\n',np.round(corr,2))
import matplotlib.pyplot as plt
import seaborn as sns
plt.subplots(figsize=(10,10)) #设置画面大小
sns.heatmap(corr,annot=True,vmax=1,square=True,cmap="Blues")
plt.rcParams['font.sans-serif'] = ['SimHei'] # 添加这条可以让图形显示中文
plt.rcParams['axes.unicode_minus'] = False # 添加这条可以让图形显示负号
plt.title('相关性热力图(学号3146)')
plt.show()
plt.close
import
numpy as np
import
pandas as pd
from
sklearn.linear_model
import
Lasso
inputfile
=
'D:\data.csv'
data
=
pd.read_csv(inputfile)
lasso
=
Lasso(
1000
)
lasso.fit(data.iloc[:,
0
:
13
],data[
'y'
])
print
(
'相关系数为:'
,np.
round
(lasso.coef_,
5
))
print
(
'相关非零个数为:'
,np.
sum
(lasso.coef_ !
=
0
))
mask
=
lasso.coef_ !
=
0
print
(
'相关系数是否为零:'
,mask)
mask
=
np.append(mask,
True
)
print
(
'相关系数是否为零:'
,mask)
outputfile
=
'D:\new_reg_data.csv'
new_reg_data
=
data.iloc[:,mask]
new_reg_data.to_csv(outputfile)
print
(
'输出数据的维度为:'
,new_reg_data.shape)
import
sys
sys.path.append(
'D:\shujuwajue'
)
# 设置路径
import
numpy as np
import
pandas as pd
from
GM11
import
GM11
# 引入自编的灰色预测函数
plt.rcParams[
'font.sans-serif'
]
=
[
'SimHei'
]
#解决中文显示问题
plt.rcParams[
'axes.unicode_minus'
]
=
False
# 解决中文显示问题
inputfile1
=
'D:\new_reg_data.csv'
# 输入的数据文件
inputfile2
=
'D:\data.csv'
# 输入的数据文件
new_reg_data
=
pd.read_csv(inputfile1)
# 读取经过特征选择后的数据
data
=
pd.read_csv(inputfile2)
# 读取总的数据
new_reg_data.index
=
range
(
1994
,
2014
)
new_reg_data.loc[
2014
]
=
None
new_reg_data.loc[
2015
]
=
None
new_reg_data.loc[
2016
]
=
None
l
=
[
'x1'
,
'x3'
,
'x4'
,
'x5'
,
'x6'
,
'x7'
,
'x8'
,
'x13'
]
for
i
in
l:
f
=
GM11(new_reg_data.loc[
range
(
1994
,
2014
),i].to_numpy())[
0
]
new_reg_data.loc[
2014
,i]
=
f(
len
(new_reg_data)
-
2
)
# 2014年预测结果
new_reg_data.loc[
2015
,i]
=
f(
len
(new_reg_data)
-
1
)
# 2015年预测结果
new_reg_data.loc[
2016
,i]
=
f(
len
(new_reg_data))
# 2016年预测结果
new_reg_data[i]
=
new_reg_data[i].
round
(
2
)
# 保留两位小数
outputfile
=
'D:\new_reg_data_GM11.xls'
# 灰色预测后保存的路径
y
=
list
(data[
'y'
].values)
# 提取财政收入列,合并至新数据框中
y.extend([np.nan,np.nan,np.nan])
new_reg_data[
'y'
]
=
y
new_reg_data.to_excel(outputfile)
# 结果输出
print
(
'预测结果为:\n'
,new_reg_data.loc[
2014
:
2016
,:])
# 预测结果展示
import
matplotlib.pyplot as plt
from
sklearn.svm
import
LinearSVR
inputfile
=
'D:\new_reg_data_GM11.xls'
# 灰色预测后保存的路径
data
=
pd.read_excel(inputfile)
# 读取数据
feature
=
[
'x1'
,
'x3'
,
'x4'
,
'x5'
,
'x6'
,
'x7'
,
'x8'
,
'x13'
]
# 属性所在列
data_train
=
data.iloc[
0
:
20
,:].copy()
# 取2014年前的数据建模
data_mean
=
data_train.mean()
data_std
=
data_train.std()
data_train
=
(data_train
-
data_mean)
/
data_std
# 数据标准化
x_train
=
data_train[feature].to_numpy()
# 属性数据
y_train
=
data_train[
'y'
].to_numpy()
# 标签数据
linearsvr
=
LinearSVR()
# 调用LinearSVR()函数
linearsvr.fit(x_train,y_train)
x
=
((data[feature]
-
data_mean[feature])
/
data_std[feature]).to_numpy()
# 预测,并还原结果。
data[
'y_pred'
]
=
linearsvr.predict(x)
*
data_std[
'y'
]
+
data_mean[
'y'
]
outputfile
=
'D:\new_reg_data_GM11_revenue.xls'
# SVR预测后保存的结果
data.to_excel(outputfile)
print
(
'真实值与预测值分别为:\n'
,data[[
'y'
,
'y_pred'
]])
fig
=
data[[
'y'
,
'y_pred'
]].plot(subplots
=
True
, style
=
[
'b-o'
,
'r-*'
])
# 画出预测结果图
plt.title(‘
3146'
)
plt.show()
标签:因素,plt,预测,train,财政收入,import,new,data,reg From: https://www.cnblogs.com/shizihao/p/17181789.html