概要
应用场景:用户流失
本文将介绍模型调用预测的步骤,这里深度学习模型使用的是自定义的deepfm
代码
导包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from scipy import stats
from scipy import signal
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score
from scipy.spatial.distance import cosine
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
import tensorflow as tf
from tensorflow.keras.models import Model
import os,gc,re,warnings,sys,math
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
读取数据
data = pd.read_csv('df_04m.csv')
区分稀疏及类别变量
sparse_cols = ['shop_id','sex']
dense_cols = [c for c in data.columns if c not in sparse_cols + ['customer_id', 'flag', 'duartion_is_lm']]
dense特征处理
def process_dense_feats(data, cols):
d = data.copy()
for f in cols:
d[f] = d[f].fillna(0)
ss=StandardScaler()
d[f] = ss.fit_transform(d[[f]])
return d
data = process_dense_feats(data, dense_cols)
sparse稀疏特征处理
def process_sparse_feats(data, cols):
d = data.copy()
for f in cols:
d[f] = d[f].fillna('-1').astype(str)
label_encoder = LabelEncoder()
d[f] = label_encoder.fit_transform(d[f])
return d
data = process_sparse_feats(data, sparse_cols)
切分训练及测试集
y = data['flag']
X = data.drop(['customer_id', 'flag', 'duartion_is_lm'], axis = 1)
数据处理成模型样式
X_sparse_x = [X[f].values for f in sparse_cols]
X_dense_x = [X[f].values for f in dense_cols]
y_label = [y.values]
X_sparse_x
模型读取
loaded_model = tf.keras.models.load_model('deepfm_model.h5')
模型预测
deepfm_prob = loaded_model.predict(X_sparse_x+X_dense_x, batch_size=4096*4, verbose=1)
deepfm_prob.shape
deepfm_prob
df_submit = pd.DataFrame()
df_submit = data
df_submit['prob'] = deepfm_prob
df_submit.head(3)
df_submit.shape
df_submit['y_pre'] = ''
df_submit['y_pre'].loc[(df_submit['prob']>=0.5)] = 1
df_submit['y_pre'].loc[(df_submit['prob']<0.5)] = 0
df_submit.head(3)
df_submit = df_submit.reset_index()
df_submit.head(1)
df_submit = df_submit.drop('index', axis = 1)
df_submit.head(1)
df_submit.groupby(['flag', 'y_pre'])['customer_id'].count()
根据上述结果打印召回及精准
precision =
recall =
参考资料:自己琢磨将资料整合
标签:调用,df,模型,cols,submit,sparse,深度,import,data From: https://blog.csdn.net/weixin_42504788/article/details/141753418