1.读取数据
import pandas as pd
import openpyxl
import numpy as np
data = pd.read_excel("D:我国大陆经济发展状况数据.xlsx",header=None,engine='openpyxl')
data = data [2:]
data = data[[1,2,3,4,5,6,7,8]]
data
2.去中心化
sample,feature=data.shape
data = data - np.mean(data)
data
3.计算协方差矩阵
data1 = np.mat(data)
data1 = data1.astype('float16')
covX = np.cov(data1.T)
covX
5.求特征值和特征向量
eig_val, eig_vec = np.linalg.eig(covX)
eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(feature)]
eig_val
6.排序
index = np.argsort(-eig_val)
np.argsort(eig_val)
7.降维
k = 3
selectVec = np.matrix(eig_vec.T[index[:k]])
finalData = data1 * selectVec.T # (30, 8) * (8, 3) = (30, 3)
finalData.shape
finalData
标签:val,data1,练习,降维,vec,eig,np,PCA,data From: https://www.cnblogs.com/1759945451qq/p/16855585.html