背景:计算两个多维数据的交并商
a = np.arange(0,9).reshape(-1,3)
print(a)
b = np.arange(4,13).reshape(-1,3)
print(b)
c = np.random.rand(4,3)
print(c, '\n')
A = np.concatenate([a,c],axis=0)
B = np.concatenate([b,c],axis=0)
print(A)
print(B)
def jaccard_index_arr(A, B):
inter = np.array([x for x in set(tuple(x) for x in A) & set(tuple(x) for x in B)])
union = np.array([x for x in set(tuple(x) for x in A) | set(tuple(x) for x in B)])
index = len(inter)/len(union)
print(len(inter), len(union), index)
return index
jaccard_index_arr(A, B)
import pandas as pd
from auto_funcs2 import jaccard_array_similarity
df_pre = pd.read_pickle("jaccard.pkl")
num_cat = 10
def get_indicator_jaccard(df_pre, num_cat):
import pandas as pd
import numpy as np
jas_list = []
for c in range(num_cat):
try:
A = df_pre[df_pre['pse_lab']==c][['red1', 'red2']].to_numpy()
B = df_pre[df_pre['usp_lab']==c][['red1', 'red2']].to_numpy()
jas = jaccard_array_similarity(A,B)
jas_list.append(jas)
print(jas)
except:
jas_list.append(0)
jas_mean = np.mean(jas_list)
print(jas_mean)
return jas_mean
std_dif = get_indicator_jaccard(df_pre, num_cat)
- 参考文献
- [1] https://stackoverflow.com/questions/8317022/get-intersecting-rows-across-two-2d-numpy-arrays
- [2] https://scikit-learn.org/stable/modules/generated/sklearn.metrics.jaccard_score.html
- [3] https://leetcode.com/problems/intersection-of-two-arrays/
- [4] https://scikit-learn.org/0.15/modules/generated/sklearn.metrics.jaccard_similarity_score.html
- [5] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.jaccard.html
- [6] https://www.statology.org/jaccard-similarity-python/
- [8] https://zhuanlan.zhihu.com/p/358736351