3.1线性回归
import numpy as np
import matplotlib.pyplot as plt
class LinearRegressionClosedFormSol:
"""
线性回归,模型的闭式解
1.数据的预处理:是否训练偏置项fit_intercept(默认True),是否标准化normalized(True)
2.模型的训练:闭式解 fit(self, x_train, y_train)
3.模型的预测 predict(self, x_test)
4.均方误差,判决系数
5.模型预测可视化
"""
def __init__(self, fit_intercept=True, normalized=True):
self.fit_intercept = fit_intercept # 是否训练偏置项
self.normalized = normalized # 是否对样本进行标准化
self.theta = None
if self.normalized:
# 如果需要标准化,则计算样本特征的均值和标准方差,以便对测试样本标准化,模型系数的还原
self.feature_mean, self.feature_std = None, None
self.mse = None # 模型预测的均方误差
self.r2, self.r2_adj = 0.0, 0.0 # 判决系数和修正判决系数
self.n_samples, self.n_features = 0, 0 # 样本量和特征属性数目
def fit(self, x_train, y_train):
"""
样本的预处理,模型系数的求解,闭式解公式
:param x_train: 训练样本:ndarray,m*k
:param y_train: 目标值:ndarray,m*1
:return:
"""
if self.normalized:
self.feature_mean = np.mean(x_train, axis=0) # 样本特征均值 axis = 0:压缩行,对各列求均值,返回1*n的矩阵,
self.feature_std = np.std(x_train, axis=1) + 1e-8 # 样本特征标准方差,1e-8是避免分母是0
x_train = (x_train - self.feature_mean) / self.feature_std # 标准化
if self.fit_intercept:
x_train = np.c_[x_train, np.ones_like(y_train)] # 在样本后面加一列1,np.c_ 用于连接两个矩阵
# 训练模型
self._fit_closed_form_solution(x_train, y_train)
def _fit_closed_form_solution(self, x_train, y_train):
"""
模型系数的求解,闭式解公式
:param x_train:数据预处理后的训练样本:ndarray,m*k
:param y_train:目标值:ndarray,m*1
:return:
"""
# pinv:伪逆,(X'*X)^(-1)*X'
self.theta = np.linalg.pinv(x_train).dot(y_train)
# xtx = np.dot(x_train.T, x_train) + 0.01 * np.eye(x_train.shape[1]) # 防止不可逆
# self.theta = np.linalg.inv(xtx).dot(x_train.T).dot(y_train)
def get_params(self):
"""
获取模型的系数
:return:
"""
if self.fit_intercept:
weight, bias = self.theta[:-1], self.theta[-1]
else:
weight, bias = self.theta, np.array([0])
if self.normalized:
weight = weight / self.feature_std # 还原模型系数
bias = bias - weight.T.dot(self.feature_mean)
return weight, bias
def predict(self, x_test):
"""
模型的预测
:param x_test:
:return:
"""
try:
self.n_samples, self.n_features = x_test.shape[0], x_test.shape[1]
except IndexError:
self.n_samples, self.n_features = x_test.shape[0], 1
if self.normalized:
x_test = (x_test - self.feature_mean) / self.feature_std
if self.fit_intercept:
x_test = np.c_[x_test, np.ones(shape=x_test.shape[0])] # shape[0]代表行数
return x_test.dot(self.theta)
def cal_mse_r2(self, y_test, y_pred):
"""
模型预测的均方误差MSE,判决系数和修正判决系数
:param y_test: 测试样本真值
:param y_pred: 测试样本预测值
:return:
"""
self.mse = ((y_pred - y_test) ** 2).mean() # 均方误差
self.r2 = 1 - ((y_test - y_pred) ** 2).sum() / ((y_test - y_pred) ** 2).sum()
self.r2_adj = 1 - (1 - self.r2) * (self.n_samples - 1) / (self.n_samples - self.n_features - 1)
return self.mse, self.r2, self.r2_adj
def plt_predict(self, y_test, y_pred, is_sort=True):
"""
预测结果的可视化
:param y_test: 测试样本真值
:param y_pred: 测试样本预测值
:param is_sort: 是否对预测值进行排序,然后可视化
:return:
"""
plt.figure(figsize=(7, 5))
if is_sort:
idx = np.argsort(y_test)
plt.plot(y_test[idx], "k--", lw=1.5, label="Test True Val")
plt.plot(y_pred[idx], "r:", lw=1.8, label="Predict Val")
else:
plt.plot(y_test, "ko-", lw=1.5, label="Test True Val")
plt.plot(y_pred, "r*-", lw=1.8, label="Predict Val")
plt.xlabel("Test samples number", fontdict={"fontsize": 12})
plt.ylabel("Predicted samples values", fontdict={"fontsize": 12})
plt.title("The predicted values of test samples \n "
"MSE = %.5f, R2 = %.5f, R2_adj = %.5f" % (self.mse, self.r2, self.r2_adj))
plt.grid(ls=":")
plt.legend(frameon=False)
plt.show()
标签:plt,fit,代码,self,test,train,线性,np,模型
From: https://blog.csdn.net/weixin_67870062/article/details/137211853