标签:数据分析 1.00 Python NaN 0.99 财政收入 new data reg
# -*- coding: utf-8 -*-
# 代码6-1
import numpy as np
import pandas as pd
inputfile = '../data/data.csv' # 输入的数据文件
data = pd.read_csv(inputfile) # 读取数据
# 描述性统计分析
description = [data.min(), data.max(), data.mean(),
data.std()] # 依次计算最小值、最大值、均值、标准差
description = pd.DataFrame(
description, index=['Min', 'Max', 'Mean', 'STD']).T # 将结果存入数据框
print('描述性统计结果:\n', np.round(description, 2)) # 保留两位小数
描述性统计结果:
Min Max Mean STD
x1 3831732.00 7599295.00 5579519.95 1262194.72
x2 181.54 2110.78 765.04 595.70
x3 448.19 6882.85 2370.83 1919.17
x4 7571.00 42049.14 19644.69 10203.02
x5 6212.70 33156.83 15870.95 8199.77
x6 6370241.00 8323096.00 7350513.60 621341.85
x7 525.71 4454.55 1712.24 1184.71
x8 985.31 15420.14 5705.80 4478.40
x9 60.62 228.46 129.49 50.51
x10 65.66 852.56 340.22 251.58
x11 97.50 120.00 103.31 5.51
x12 1.03 1.91 1.42 0.25
x13 5321.00 41972.00 17273.80 11109.19
y 64.87 2088.14 618.08 609.25
# 代码6-2
# 相关性分析
corr = data.corr(method='pearson') # 计算相关系数矩阵
print('相关系数矩阵为:\n', np.round(corr, 2)) # 保留两位小数
相关系数矩阵为:
x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 \
x1 1.00 0.95 0.95 0.97 0.97 0.99 0.95 0.97 0.98 0.98 -0.29 0.94
x2 0.95 1.00 1.00 0.99 0.99 0.92 0.99 0.99 0.98 0.98 -0.13 0.89
x3 0.95 1.00 1.00 0.99 0.99 0.92 1.00 0.99 0.98 0.99 -0.15 0.89
x4 0.97 0.99 0.99 1.00 1.00 0.95 0.99 1.00 0.99 1.00 -0.19 0.91
x5 0.97 0.99 0.99 1.00 1.00 0.95 0.99 1.00 0.99 1.00 -0.18 0.90
x6 0.99 0.92 0.92 0.95 0.95 1.00 0.93 0.95 0.97 0.96 -0.34 0.95
x7 0.95 0.99 1.00 0.99 0.99 0.93 1.00 0.99 0.98 0.99 -0.15 0.89
x8 0.97 0.99 0.99 1.00 1.00 0.95 0.99 1.00 0.99 1.00 -0.15 0.90
x9 0.98 0.98 0.98 0.99 0.99 0.97 0.98 0.99 1.00 0.99 -0.23 0.91
x10 0.98 0.98 0.99 1.00 1.00 0.96 0.99 1.00 0.99 1.00 -0.17 0.90
x11 -0.29 -0.13 -0.15 -0.19 -0.18 -0.34 -0.15 -0.15 -0.23 -0.17 1.00 -0.43
x12 0.94 0.89 0.89 0.91 0.90 0.95 0.89 0.90 0.91 0.90 -0.43 1.00
x13 0.96 1.00 1.00 1.00 0.99 0.94 1.00 1.00 0.99 0.99 -0.16 0.90
y 0.94 0.98 0.99 0.99 0.99 0.91 0.99 0.99 0.98 0.99 -0.12 0.87
x13 y
x1 0.96 0.94
x2 1.00 0.98
x3 1.00 0.99
x4 1.00 0.99
x5 0.99 0.99
x6 0.94 0.91
x7 1.00 0.99
x8 1.00 0.99
x9 0.99 0.98
x10 0.99 0.99
x11 -0.16 -0.12
x12 0.90 0.87
x13 1.00 0.99
y 0.99 1.00
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei'] # 中文标签
plt.rcParams['axes.unicode_minus'] = False # 正常显示负号
plt.subplots(figsize=(12, 12))
sns.heatmap(corr, annot=True, vmax=1, square=True, cmap="Reds")
plt.title('相关性热力图(20信计1班李之琛3322)')
plt.savefig('../imag/hot.jpg', dpi=600)
plt.show()
# 代码6-3
# 绘制热力图
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei'] # 中文标签
plt.rcParams['axes.unicode_minus'] = False # 正常显示负号
plt.subplots(figsize=(10, 10)) # 设置画面大小
sns.heatmap(corr, annot=True, vmax=1, square=True, cmap="Blues")
plt.title('相关性热力图')
plt.show()
# plt.close
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
inputfile = '../data/data.csv' # 输入的数据文件
data = pd.read_csv(inputfile) # 读取数据
lasso = Lasso(1000) # 调用Lasso()函数,设置λ的值为1000
lasso.fit(data.iloc[:,0:13],data['y'])
print('相关系数为:',np.round(lasso.coef_,5)) # 输出结果,保留五位小数
相关系数为: [-1.8000e-04 -0.0000e+00 1.2414e-01 -1.0310e-02 6.5400e-02 1.2000e-04
3.1741e-01 3.4900e-02 -0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00
-4.0300e-02]
F:\Anaconda\envs\my\lib\site-packages\sklearn\linear_model\_coordinate_descent.py:648: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.417e+04, tolerance: 7.053e+02
model = cd_fast.enet_coordinate_descent(
lasso.coef_
array([-1.76147790e-04, -0.00000000e+00, 1.24143041e-01, -1.03120575e-02,
6.53999569e-02, 1.15234764e-04, 3.17411469e-01, 3.49002210e-02,
-0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
-4.02991430e-02])
print('相关系数非零个数为:',np.sum(lasso.coef_ != 0)) # 计算相关系数非零的个数
相关系数非零个数为: 8
mask = lasso.coef_ != 0 # 返回一个相关系数是否为零的布尔数组
print('相关系数是否为零:',mask)
相关系数是否为零: [ True False True True True True True True False False False False
True]
data.head(5)
|
x1 |
x2 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x9 |
x10 |
x11 |
x12 |
x13 |
y |
0 |
3831732 |
181.54 |
448.19 |
7571.00 |
6212.70 |
6370241 |
525.71 |
985.31 |
60.62 |
65.66 |
120.0 |
1.029 |
5321 |
64.87 |
1 |
3913824 |
214.63 |
549.97 |
9038.16 |
7601.73 |
6467115 |
618.25 |
1259.20 |
73.46 |
95.46 |
113.5 |
1.051 |
6529 |
99.75 |
2 |
3928907 |
239.56 |
686.44 |
9905.31 |
8092.82 |
6560508 |
638.94 |
1468.06 |
81.16 |
81.16 |
108.2 |
1.064 |
7008 |
88.11 |
3 |
4282130 |
261.58 |
802.59 |
10444.60 |
8767.98 |
6664862 |
656.58 |
1678.12 |
85.72 |
91.70 |
102.2 |
1.092 |
7694 |
106.07 |
4 |
4453911 |
283.14 |
904.57 |
11255.70 |
9422.33 |
6741400 |
758.83 |
1893.52 |
88.88 |
114.61 |
97.7 |
1.200 |
8027 |
137.32 |
mask = np.append(mask,True)
mask
array([ True, False, True, True, True, True, True, True, False,
False, False, False, True, True])
outputfile ='../tmp/new_reg_data_2.csv' # 输出的数据文件
new_reg_data = data.iloc[:, mask] # 返回相关系数非零的数据
new_reg_data
|
x1 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x13 |
y |
0 |
3831732 |
448.19 |
7571.00 |
6212.70 |
6370241 |
525.71 |
985.31 |
5321 |
64.87 |
1 |
3913824 |
549.97 |
9038.16 |
7601.73 |
6467115 |
618.25 |
1259.20 |
6529 |
99.75 |
2 |
3928907 |
686.44 |
9905.31 |
8092.82 |
6560508 |
638.94 |
1468.06 |
7008 |
88.11 |
3 |
4282130 |
802.59 |
10444.60 |
8767.98 |
6664862 |
656.58 |
1678.12 |
7694 |
106.07 |
4 |
4453911 |
904.57 |
11255.70 |
9422.33 |
6741400 |
758.83 |
1893.52 |
8027 |
137.32 |
5 |
4548852 |
1000.69 |
12018.52 |
9751.44 |
6850024 |
878.26 |
2139.18 |
8549 |
188.14 |
6 |
4962579 |
1121.13 |
13966.53 |
11349.47 |
7006896 |
923.67 |
2492.74 |
9566 |
219.91 |
7 |
5029338 |
1248.29 |
14694.00 |
11467.35 |
7125979 |
978.21 |
2841.65 |
10473 |
271.91 |
8 |
5070216 |
1370.68 |
13380.47 |
10671.78 |
7206229 |
1009.24 |
3203.96 |
11469 |
269.10 |
9 |
5210706 |
1494.27 |
15002.59 |
11570.58 |
7251888 |
1175.17 |
3758.62 |
12360 |
300.55 |
10 |
5407087 |
1677.77 |
16884.16 |
13120.83 |
7376720 |
1348.93 |
4450.55 |
14174 |
338.45 |
11 |
5744550 |
1905.84 |
18287.24 |
14468.24 |
7505322 |
1519.16 |
5154.23 |
16394 |
408.86 |
12 |
5994973 |
2199.14 |
19850.66 |
15444.93 |
7607220 |
1696.38 |
6081.86 |
17881 |
476.72 |
13 |
6236312 |
2624.24 |
22469.22 |
18951.32 |
7734787 |
1863.34 |
7140.32 |
20058 |
838.99 |
14 |
6529045 |
3187.39 |
25316.72 |
20835.95 |
7841695 |
2105.54 |
8287.38 |
22114 |
843.14 |
15 |
6791495 |
3615.77 |
27609.59 |
22820.89 |
7946154 |
2659.85 |
9138.21 |
24190 |
1107.67 |
16 |
7110695 |
4476.38 |
30658.49 |
25011.61 |
8061370 |
3263.57 |
10748.28 |
29549 |
1399.16 |
17 |
7431755 |
5243.03 |
34438.08 |
28209.74 |
8145797 |
3412.21 |
12423.44 |
34214 |
1535.14 |
18 |
7512997 |
5977.27 |
38053.52 |
30490.44 |
8222969 |
3758.39 |
13551.21 |
37934 |
1579.68 |
19 |
7599295 |
6882.85 |
42049.14 |
33156.83 |
8323096 |
4454.55 |
15420.14 |
41972 |
2088.14 |
new_reg_data = new_reg_data.iloc[:,:-1]
new_reg_data
|
x1 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x13 |
0 |
3831732 |
448.19 |
7571.00 |
6212.70 |
6370241 |
525.71 |
985.31 |
5321 |
1 |
3913824 |
549.97 |
9038.16 |
7601.73 |
6467115 |
618.25 |
1259.20 |
6529 |
2 |
3928907 |
686.44 |
9905.31 |
8092.82 |
6560508 |
638.94 |
1468.06 |
7008 |
3 |
4282130 |
802.59 |
10444.60 |
8767.98 |
6664862 |
656.58 |
1678.12 |
7694 |
4 |
4453911 |
904.57 |
11255.70 |
9422.33 |
6741400 |
758.83 |
1893.52 |
8027 |
5 |
4548852 |
1000.69 |
12018.52 |
9751.44 |
6850024 |
878.26 |
2139.18 |
8549 |
6 |
4962579 |
1121.13 |
13966.53 |
11349.47 |
7006896 |
923.67 |
2492.74 |
9566 |
7 |
5029338 |
1248.29 |
14694.00 |
11467.35 |
7125979 |
978.21 |
2841.65 |
10473 |
8 |
5070216 |
1370.68 |
13380.47 |
10671.78 |
7206229 |
1009.24 |
3203.96 |
11469 |
9 |
5210706 |
1494.27 |
15002.59 |
11570.58 |
7251888 |
1175.17 |
3758.62 |
12360 |
10 |
5407087 |
1677.77 |
16884.16 |
13120.83 |
7376720 |
1348.93 |
4450.55 |
14174 |
11 |
5744550 |
1905.84 |
18287.24 |
14468.24 |
7505322 |
1519.16 |
5154.23 |
16394 |
12 |
5994973 |
2199.14 |
19850.66 |
15444.93 |
7607220 |
1696.38 |
6081.86 |
17881 |
13 |
6236312 |
2624.24 |
22469.22 |
18951.32 |
7734787 |
1863.34 |
7140.32 |
20058 |
14 |
6529045 |
3187.39 |
25316.72 |
20835.95 |
7841695 |
2105.54 |
8287.38 |
22114 |
15 |
6791495 |
3615.77 |
27609.59 |
22820.89 |
7946154 |
2659.85 |
9138.21 |
24190 |
16 |
7110695 |
4476.38 |
30658.49 |
25011.61 |
8061370 |
3263.57 |
10748.28 |
29549 |
17 |
7431755 |
5243.03 |
34438.08 |
28209.74 |
8145797 |
3412.21 |
12423.44 |
34214 |
18 |
7512997 |
5977.27 |
38053.52 |
30490.44 |
8222969 |
3758.39 |
13551.21 |
37934 |
19 |
7599295 |
6882.85 |
42049.14 |
33156.83 |
8323096 |
4454.55 |
15420.14 |
41972 |
new_reg_data.to_csv(outputfile) # 存储数据
print('输出数据的维度为:',new_reg_data.shape) # 查看输出数据的维度
输出数据的维度为: (20, 8)
#-*- coding: utf-8 -*-
def GM11(x0): #自定义灰色预测函数
import numpy as np
x1 = x0.cumsum() #1-AGO序列
z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列
z1 = z1.reshape((len(z1),1))
B = np.append(-z1, np.ones_like(z1), axis = 1)
Yn = x0[1:].reshape((len(x0)-1, 1))
[[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数
f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值
delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)]))
C = delta.std()/x0.std()
P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0)
return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率
# 代码6-5
# import sys
# sys.path.append('../code') # 设置路径
import numpy as np
import pandas as pd
# from GM11 import GM11 # 引入自编的灰色预测函数
inputfile1 = '../tmp/new_reg_data_2.csv' # 输入的数据文件
inputfile2 = '../data/data.csv' # 输入的数据文件
new_reg_data = pd.read_csv(inputfile1) # 读取经过特征选择后的数据
data = pd.read_csv(inputfile2) # 读取总的数据
new_reg_data.index = range(1994, 2014)
new_reg_data.loc[2014] = None
new_reg_data.loc[2015] = None
l = ['x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13']
C:\Windows\Temp\ipykernel_63584\756966604.py:6: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
new_reg_data.loc[2014] = None
C:\Windows\Temp\ipykernel_63584\756966604.py:7: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
new_reg_data.loc[2015] = None
new_reg_data
|
Unnamed: 0 |
x1 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x13 |
1994 |
0.0 |
3831732.0 |
448.19 |
7571.00 |
6212.70 |
6370241.0 |
525.71 |
985.31 |
5321.0 |
1995 |
1.0 |
3913824.0 |
549.97 |
9038.16 |
7601.73 |
6467115.0 |
618.25 |
1259.20 |
6529.0 |
1996 |
2.0 |
3928907.0 |
686.44 |
9905.31 |
8092.82 |
6560508.0 |
638.94 |
1468.06 |
7008.0 |
1997 |
3.0 |
4282130.0 |
802.59 |
10444.60 |
8767.98 |
6664862.0 |
656.58 |
1678.12 |
7694.0 |
1998 |
4.0 |
4453911.0 |
904.57 |
11255.70 |
9422.33 |
6741400.0 |
758.83 |
1893.52 |
8027.0 |
1999 |
5.0 |
4548852.0 |
1000.69 |
12018.52 |
9751.44 |
6850024.0 |
878.26 |
2139.18 |
8549.0 |
2000 |
6.0 |
4962579.0 |
1121.13 |
13966.53 |
11349.47 |
7006896.0 |
923.67 |
2492.74 |
9566.0 |
2001 |
7.0 |
5029338.0 |
1248.29 |
14694.00 |
11467.35 |
7125979.0 |
978.21 |
2841.65 |
10473.0 |
2002 |
8.0 |
5070216.0 |
1370.68 |
13380.47 |
10671.78 |
7206229.0 |
1009.24 |
3203.96 |
11469.0 |
2003 |
9.0 |
5210706.0 |
1494.27 |
15002.59 |
11570.58 |
7251888.0 |
1175.17 |
3758.62 |
12360.0 |
2004 |
10.0 |
5407087.0 |
1677.77 |
16884.16 |
13120.83 |
7376720.0 |
1348.93 |
4450.55 |
14174.0 |
2005 |
11.0 |
5744550.0 |
1905.84 |
18287.24 |
14468.24 |
7505322.0 |
1519.16 |
5154.23 |
16394.0 |
2006 |
12.0 |
5994973.0 |
2199.14 |
19850.66 |
15444.93 |
7607220.0 |
1696.38 |
6081.86 |
17881.0 |
2007 |
13.0 |
6236312.0 |
2624.24 |
22469.22 |
18951.32 |
7734787.0 |
1863.34 |
7140.32 |
20058.0 |
2008 |
14.0 |
6529045.0 |
3187.39 |
25316.72 |
20835.95 |
7841695.0 |
2105.54 |
8287.38 |
22114.0 |
2009 |
15.0 |
6791495.0 |
3615.77 |
27609.59 |
22820.89 |
7946154.0 |
2659.85 |
9138.21 |
24190.0 |
2010 |
16.0 |
7110695.0 |
4476.38 |
30658.49 |
25011.61 |
8061370.0 |
3263.57 |
10748.28 |
29549.0 |
2011 |
17.0 |
7431755.0 |
5243.03 |
34438.08 |
28209.74 |
8145797.0 |
3412.21 |
12423.44 |
34214.0 |
2012 |
18.0 |
7512997.0 |
5977.27 |
38053.52 |
30490.44 |
8222969.0 |
3758.39 |
13551.21 |
37934.0 |
2013 |
19.0 |
7599295.0 |
6882.85 |
42049.14 |
33156.83 |
8323096.0 |
4454.55 |
15420.14 |
41972.0 |
2014 |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
2015 |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
import xlwt
for i in l:
f = GM11(new_reg_data.loc[range(1994, 2014),i].values)[0]
new_reg_data.loc[2014,i] = f(len(new_reg_data)-1) # 2014年预测结果
new_reg_data.loc[2015,i] = f(len(new_reg_data)) # 2015年预测结果
new_reg_data[i] = new_reg_data[i].round(2) # 保留两位小数
outputfile = '../tmp/new_reg_data_GM11_2.xls' # 灰色预测后保存的路径
y = list(data['y'].values) # 提取财政收入列,合并至新数据框中
y.extend([np.nan,np.nan])
new_reg_data['y'] = y
new_reg_data.to_excel(outputfile) # 结果输出
print('预测结果为:\n',new_reg_data.loc[2014:2015,:]) # 预测结果展示
预测结果为:
Unnamed: 0 x1 x3 x4 x5 x6 \
2014 NaN 8142148.24 7042.31 43611.84 35046.63 8505522.58
2015 NaN 8460489.28 8166.92 47792.22 38384.22 8627139.31
x7 x8 x13 y
2014 4600.40 18686.28 44506.47 NaN
2015 5214.78 21474.47 49945.88 NaN
C:\Windows\Temp\ipykernel_63584\1948140384.py:10: FutureWarning: As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead. You can set the option io.excel.xls.writer to 'xlwt' to silence this warning. While this option is deprecated and will also raise a warning, it can be globally set and the warning suppressed.
new_reg_data.to_excel(outputfile) # 结果输出
# 代码6-6
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVR
inputfile = '../tmp/new_reg_data_GM11_2.xls' # 灰色预测后保存的路径
data = pd.read_excel(inputfile) # 读取数据
feature = ['x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13'] # 属性所在列
data
|
Unnamed: 0.1 |
Unnamed: 0 |
x1 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x13 |
y |
0 |
1994 |
0.0 |
3831732.00 |
448.19 |
7571.00 |
6212.70 |
6370241.00 |
525.71 |
985.31 |
5321.00 |
64.87 |
1 |
1995 |
1.0 |
3913824.00 |
549.97 |
9038.16 |
7601.73 |
6467115.00 |
618.25 |
1259.20 |
6529.00 |
99.75 |
2 |
1996 |
2.0 |
3928907.00 |
686.44 |
9905.31 |
8092.82 |
6560508.00 |
638.94 |
1468.06 |
7008.00 |
88.11 |
3 |
1997 |
3.0 |
4282130.00 |
802.59 |
10444.60 |
8767.98 |
6664862.00 |
656.58 |
1678.12 |
7694.00 |
106.07 |
4 |
1998 |
4.0 |
4453911.00 |
904.57 |
11255.70 |
9422.33 |
6741400.00 |
758.83 |
1893.52 |
8027.00 |
137.32 |
5 |
1999 |
5.0 |
4548852.00 |
1000.69 |
12018.52 |
9751.44 |
6850024.00 |
878.26 |
2139.18 |
8549.00 |
188.14 |
6 |
2000 |
6.0 |
4962579.00 |
1121.13 |
13966.53 |
11349.47 |
7006896.00 |
923.67 |
2492.74 |
9566.00 |
219.91 |
7 |
2001 |
7.0 |
5029338.00 |
1248.29 |
14694.00 |
11467.35 |
7125979.00 |
978.21 |
2841.65 |
10473.00 |
271.91 |
8 |
2002 |
8.0 |
5070216.00 |
1370.68 |
13380.47 |
10671.78 |
7206229.00 |
1009.24 |
3203.96 |
11469.00 |
269.10 |
9 |
2003 |
9.0 |
5210706.00 |
1494.27 |
15002.59 |
11570.58 |
7251888.00 |
1175.17 |
3758.62 |
12360.00 |
300.55 |
10 |
2004 |
10.0 |
5407087.00 |
1677.77 |
16884.16 |
13120.83 |
7376720.00 |
1348.93 |
4450.55 |
14174.00 |
338.45 |
11 |
2005 |
11.0 |
5744550.00 |
1905.84 |
18287.24 |
14468.24 |
7505322.00 |
1519.16 |
5154.23 |
16394.00 |
408.86 |
12 |
2006 |
12.0 |
5994973.00 |
2199.14 |
19850.66 |
15444.93 |
7607220.00 |
1696.38 |
6081.86 |
17881.00 |
476.72 |
13 |
2007 |
13.0 |
6236312.00 |
2624.24 |
22469.22 |
18951.32 |
7734787.00 |
1863.34 |
7140.32 |
20058.00 |
838.99 |
14 |
2008 |
14.0 |
6529045.00 |
3187.39 |
25316.72 |
20835.95 |
7841695.00 |
2105.54 |
8287.38 |
22114.00 |
843.14 |
15 |
2009 |
15.0 |
6791495.00 |
3615.77 |
27609.59 |
22820.89 |
7946154.00 |
2659.85 |
9138.21 |
24190.00 |
1107.67 |
16 |
2010 |
16.0 |
7110695.00 |
4476.38 |
30658.49 |
25011.61 |
8061370.00 |
3263.57 |
10748.28 |
29549.00 |
1399.16 |
17 |
2011 |
17.0 |
7431755.00 |
5243.03 |
34438.08 |
28209.74 |
8145797.00 |
3412.21 |
12423.44 |
34214.00 |
1535.14 |
18 |
2012 |
18.0 |
7512997.00 |
5977.27 |
38053.52 |
30490.44 |
8222969.00 |
3758.39 |
13551.21 |
37934.00 |
1579.68 |
19 |
2013 |
19.0 |
7599295.00 |
6882.85 |
42049.14 |
33156.83 |
8323096.00 |
4454.55 |
15420.14 |
41972.00 |
2088.14 |
20 |
2014 |
NaN |
8142148.24 |
7042.31 |
43611.84 |
35046.63 |
8505522.58 |
4600.40 |
18686.28 |
44506.47 |
NaN |
21 |
2015 |
NaN |
8460489.28 |
8166.92 |
47792.22 |
38384.22 |
8627139.31 |
5214.78 |
21474.47 |
49945.88 |
NaN |
data_train=data.loc[0:19,:]
data_train.head()
|
Unnamed: 0.1 |
Unnamed: 0 |
x1 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x13 |
y |
0 |
1994 |
0.0 |
3831732.0 |
448.19 |
7571.00 |
6212.70 |
6370241.0 |
525.71 |
985.31 |
5321.0 |
64.87 |
1 |
1995 |
1.0 |
3913824.0 |
549.97 |
9038.16 |
7601.73 |
6467115.0 |
618.25 |
1259.20 |
6529.0 |
99.75 |
2 |
1996 |
2.0 |
3928907.0 |
686.44 |
9905.31 |
8092.82 |
6560508.0 |
638.94 |
1468.06 |
7008.0 |
88.11 |
3 |
1997 |
3.0 |
4282130.0 |
802.59 |
10444.60 |
8767.98 |
6664862.0 |
656.58 |
1678.12 |
7694.0 |
106.07 |
4 |
1998 |
4.0 |
4453911.0 |
904.57 |
11255.70 |
9422.33 |
6741400.0 |
758.83 |
1893.52 |
8027.0 |
137.32 |
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean)/data_std # 数据标准化
x_train = data_train[feature].values # 属性数据
y_train = data_train['y'].values # 标签数据
data_train[feature]
|
x1 |
x3 |
x4 |
x5 |
x6 |
x7 |
x8 |
x13 |
0 |
-1.384721 |
-1.001807 |
-1.183344 |
-1.177868 |
-1.577670 |
-1.001532 |
-1.054057 |
-1.075938 |
1 |
-1.319682 |
-0.948774 |
-1.039547 |
-1.008469 |
-1.421759 |
-0.923420 |
-0.992899 |
-0.967199 |
2 |
-1.307732 |
-0.877665 |
-0.954558 |
-0.948579 |
-1.271451 |
-0.905956 |
-0.946262 |
-0.924082 |
3 |
-1.027884 |
-0.817144 |
-0.901702 |
-0.866240 |
-1.103501 |
-0.891067 |
-0.899357 |
-0.862331 |
4 |
-0.891787 |
-0.764006 |
-0.822206 |
-0.786439 |
-0.980320 |
-0.804759 |
-0.851259 |
-0.832356 |
5 |
-0.816568 |
-0.713922 |
-0.747442 |
-0.746302 |
-0.805498 |
-0.703950 |
-0.796405 |
-0.785368 |
6 |
-0.488784 |
-0.651165 |
-0.556517 |
-0.551415 |
-0.553025 |
-0.665620 |
-0.717457 |
-0.693822 |
7 |
-0.435893 |
-0.584907 |
-0.485218 |
-0.537039 |
-0.361370 |
-0.619583 |
-0.639547 |
-0.612178 |
8 |
-0.403507 |
-0.521135 |
-0.613957 |
-0.634063 |
-0.232215 |
-0.593391 |
-0.558646 |
-0.522522 |
9 |
-0.292201 |
-0.456737 |
-0.454973 |
-0.524450 |
-0.158730 |
-0.453332 |
-0.434793 |
-0.442319 |
10 |
-0.136614 |
-0.361123 |
-0.270560 |
-0.335390 |
0.042177 |
-0.306664 |
-0.280290 |
-0.279030 |
11 |
0.130748 |
-0.242285 |
-0.133043 |
-0.171067 |
0.249152 |
-0.162975 |
-0.123162 |
-0.079196 |
12 |
0.329151 |
-0.089458 |
0.020188 |
-0.051955 |
0.413148 |
-0.013386 |
0.083972 |
0.054657 |
13 |
0.520357 |
0.132044 |
0.276833 |
0.375666 |
0.618457 |
0.127542 |
0.320320 |
0.250621 |
14 |
0.752281 |
0.425479 |
0.555917 |
0.605505 |
0.790517 |
0.331980 |
0.576452 |
0.435693 |
15 |
0.960212 |
0.648690 |
0.780642 |
0.847578 |
0.958636 |
0.799865 |
0.766437 |
0.622566 |
16 |
1.213105 |
1.097119 |
1.079465 |
1.114746 |
1.144067 |
1.309456 |
1.125956 |
1.104959 |
17 |
1.467472 |
1.496590 |
1.449903 |
1.504773 |
1.279945 |
1.434921 |
1.500009 |
1.524882 |
18 |
1.531837 |
1.879172 |
1.804253 |
1.782915 |
1.404147 |
1.727127 |
1.751833 |
1.859740 |
19 |
1.600209 |
2.351033 |
2.195865 |
2.108093 |
1.565294 |
2.314745 |
2.169154 |
2.223223 |
linearsvr = LinearSVR() # 调用LinearSVR()函数
linearsvr.fit(x_train,y_train)
x = ((data[feature] - data_mean[feature])/data_std[feature]).values # 预测,并还原结果。
data['y_pred'] = linearsvr.predict(x) * data_std['y'] + data_mean['y']
outputfile = '../tmp/new_reg_data_GM11_revenue_2.xls' # SVR预测后保存的结果
data.to_excel(outputfile)
print('真实值与预测值分别为:\n',data[['y','y_pred']])
fig = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*']) # 画出预测结果图
plt.show()
F:\Anaconda\envs\my\lib\site-packages\sklearn\svm\_base.py:1225: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
warnings.warn(
C:\Windows\Temp\ipykernel_63584\2363168573.py:6: FutureWarning: As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead. You can set the option io.excel.xls.writer to 'xlwt' to silence this warning. While this option is deprecated and will also raise a warning, it can be globally set and the warning suppressed.
data.to_excel(outputfile)
真实值与预测值分别为:
y y_pred
0 64.87 37.302597
1 99.75 83.928503
2 88.11 94.774134
3 106.07 106.434331
4 137.32 151.059897
5 188.14 188.205356
6 219.91 219.566685
7 271.91 230.340346
8 269.10 219.659959
9 300.55 300.550000
10 338.45 383.515443
11 408.86 463.222420
12 476.72 554.905404
13 838.99 691.434167
14 843.14 843.095183
15 1107.67 1088.163763
16 1399.16 1379.593271
17 1535.14 1536.823098
18 1579.68 1739.224541
19 2088.14 2086.022177
20 NaN 2188.120305
21 NaN 2539.228245
标签:数据分析,
1.00,
Python,
NaN,
0.99,
财政收入,
new,
data,
reg
From: https://www.cnblogs.com/Reion/p/17181371.html