knn通过计算电影相关度,计算用户1和用户2的评分
import json
import numpy as np
# 计算欧式距离分数
def euclidean_score(dataset, user1, user2):
if user1 not in dataset:
raise TypeError('User ' + user1 + ' not present in the dataset') if user2 not in dataset:
raise TypeError('User ' + user2 + ' not present in the dataset') # 提取用户1和用户2的评论过的电影
rated_by_both = {} for item in dataset[user1]:
if item in dataset[user2]:
rated_by_both[item] = 1 # 如果没有评分,得分为0
if len(rated_by_both) == 0:
return 0 squared_differences = []
for item in dataset[user1]:
if item in dataset[user2]:
squared_differences.append(np.square(dataset[user1][item] - dataset[user2][item]))
return 1 / (1 + np.sqrt(np.sum(squared_differences)))
# 计算得分
if __name__=='__main__':
data_file = 'movie_ratings.json' with open(data_file, 'r') as f:
data = json.loads(f.read()) user1 = 'John Carson'
user2 = 'Michelle Peterson' print "\nEuclidean score:"
print euclidean_score(data, user1, user2)
标签:knn,__,user2,user1,用户,dataset,240721,item,np
From: https://blog.51cto.com/u_15862653/11888528