代码:
clc;
TC=ProblemCDataWordle;
%数据处理
noC=TC(:,1);
wordC=TC(:,2);
dataC=TC(:,3:11);
no=cell2mat(noC);
data=cell2mat(dataC);
L=size(wordC);
L=L(1);
word=[];
%原表格有错误,根据网络数据进行修正
wordC{36}="clean" ;
wordC{247}="trash";
%修正end
for i=1:L
str=char(wordC{i});
num=0;
for j=1:5
num=num*100;
num=num+str(j)-'a'+1;
end
word=[word;num];
end
Hp=data(:,2)./data(:,1);
Players=data(:,1);
ana=data(:,3:7);
%数据处理完毕
plot(no,Players);
X=[no no.*no no.*no.*no no.*no.*no.*no no.*no.*no.*no.*no no.*no.*no.*no.*no.*no...
exp(no) exp(no.*no) log(no) log(no).*log(no) ...
exp(no).*log(no)];
y=Players;
%stepwise (X,y);
%R方=0.985990约等于0.986 调整后R方=0.985711 完全通过F检验
%x1 x2 x3 x4 x5 x9 x10都极强的通过p检验
%y=-298444e+10+54082223.7047499*no-70804.0624355989*no.^2+81.2251478254803*no.^3
% -0.0581796757825891*no.^4+1.87254597040691e-605*no.^5+14792650464.3214*log(no)
% -2033287277.42065*log(no).*log(no)
close all;
gy=fun1(no);
plot(no,gy);
hold on;
scatter(no,Players);
close all;
scatter(no,(gy-y)/y);
e=flip(gy-y);
ds=[];
ebar=[];
for i=1:L
de=e(i:L,1);
newebar=sum(abs(de))/(L-i+1);
newds=sqrt(sum(de.*de)/(L-i+1));
ds=[ds;newds];
ebar=[ebar;newebar];
end
hold on;
close all;
scatter(202:201+L,ds);
close all;
%scatter(202:201+L,ebar);
%全过程方差10521,后半段方差2735.81
%
newX=X(:,[1 2 3 4 5 9 10]);
stepwise(newX,y);
%model=fitlm(newX,y)
% ci=coefCI(model)
% 不懂fitlm为什么算出来的和stepwise不一样,不管了,反正stepwise也提供置信区间
%
% stepwise提供的置信区间
% 45542572.6802495 62621874.7292504
% -83037.1825962702 -58570.9422749276
% 65.9332107876828 96.5170848632777
% -0.0700610194043316 -0.0462983321608466
% 1.45963099050699e-05 2.28546095030684e-05
% 12719524815.9227 16865776112.7201
% -2325793821.44112 -1740780733.40019
% 经过调查,发现这是回归系数的95%置信区间...而不是函数值的
%fun1(540)=20838
%自由度为 n - p - 1,其中 n 是样本量,p 是自变量的数量。因此自由度极大,此时t分布近似于标准正态分布
%标准误差是残差的标准差,上下浮动曲线是对应的分位数乘以标准误差
%95%置信水平在正态分布中对应的分位数是1.96,所以对应浮动量是20621.16、5362.1876,这个太恐怖了
%80%置信水平则是对应1.28,这个也太恐怖了
%那就专门针对后半段建立拟合曲线,标准差近似为2735.81
%因此,在95%置信水平中,[15476,26200].置信区间的中值为20838
scatter(no,(gy-y)./y);
sum((gy-y)./y)/L
%误差率为0.0237816360614148,不如20838±2.378% 即[20342,21334]
20838*(1-0.02378)
%model=fit(newX(:,1),y,"exp2");
function answer=fun1(no)
answer=-29844369576.6553+54082223.7047499*no-70804.0624355989*no.^2+ ...
81.2251478254803*no.^3-0.0581796757825891*no.^4+0.0000187254597040691*no.^5 ...
+14792650464.3214*log(no)-2033287277.42065*log(no).*log(no);
end
标签:...,num,log,no,gy,美赛,做题,stepwise,2023C
From: https://www.cnblogs.com/gongkai/p/17997386