"""
函数说明:数据归一化
Parameters:
dataMatrix - 数据矩阵
Returns:
matNormalized - 归一化后的数据矩阵
ranges - 每一维数据 max-min 的值
mins- 1*14矩阵,存储数据对应维的最小值
"""
def dataNormalization(dataMatrix):
mins= dataMatrix.min(0)
maxs= datMatrix.max(0)
ranges=maxs-mins
matNormalized=np.zeros(up.shape(dataMatrix))
m=dataMatrix.shape[0]
matNormalized = dataMatrix - np.tile(mins,(m,1))
matNormalized = matNormalized / np.tile(ranges,(m,1))
return matNormalized,ranges,mins
"""
函数说明:对单个数据单元进行分类
Parameters:
testUnit - 1*14矩阵,单个测试数据
traingSet - 训练集
labels - 标签列表
k - KNN参数
Returns:
sortedClassCountDict[0][0] - 预测分类结果
"""
def unitclassify(testUnit,traingSet,labels,k):
traingSetSize = traingSet.shape[0]
diffMatrix = np.tile(testUnit,(traingSetSize,1)) - traingSet
sqMatrix = diffMatrix * * 2
distanceMatrix = sqMatrix.sum(axis=1)
distances = distanceMatrix * * 0.5
sortedLabels = distances.argsort()
classCountDict = {}
for i in range(k):
label = labels[sortedLabels[i]]
classCountDict[label] = classCountDict.get(label,0)+1
sortedClassCountDict = sorted(classCountDict.items(),key=lambda unit:unit[1],reverse=True)
return sortedClassCountDict[0][0]
标签:matNormalized,dataMatrix,作业,traingSet,ranges,np,mins
From: https://blog.51cto.com/u_15830263/6408166