数值数据分类后交叉,但是数据量少,或者划分标准不科学
导致分类的类别有缺失,交叉后会丧失类别,数据不齐整
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.rand(100,2))
bins = np.arange(0,1.3,0.1)
bins_label =[str(i)[:3]+'_~' for i in bins[:-1]]
df[3] = pd.cut(df[0],bins=bins,labels=bins_label)
df[4] = pd.cut(df[1],bins=bins,labels=bins_label)
data = pd.crosstab(index=df[3],columns=df[4]) # 填充前
def df填充框架(df: pd.DataFrame, index: list, columns: list):
df2 = pd.DataFrame(index = index,columns = columns)
df2.update(df)
df2 = df2.fillna(0)
return df2
data = df填充框架(data,index=bins_label,columns=bins_label) # 填充后