策略结构化解析代码的 函数
def get_final_data(df ,project_name):
dfi = df.loc[df.project_name == project_name].copy()
dfi.reset_index(inplace = True ,drop = True)
data_frame = dfi.get_json(['input_data','temp_data','output_data'])
#data_frame['reason_code'] = data_frame['__reason_code']
data_frame['reason_code'] = data_frame['__reason_code'].apply(lambda x:str(x).replace("[",'').replace("]",'').replace("'","") if isinstance(x,(str,list)) else x)
#data_frame['reason_code'] = data_frame['reason_code'].apply(lambda x:str(x).split(',')[0].replace('"','') if isinstance(x,(str,list)) and str(x).find(',') > -1 else x)
data_frame.drop_col(['kh_identNo','kh_mobileNo','kh_name','__reason_code' ,'__fire_trace','__node_trace'] ,inplace=True)
data_frame = data_frame.merge(dfi[['session_id']] ,how = 'left' ,left_index= True ,right_index=True)
data_frame = data_frame.rename_col({'OUT_TEMPDECISION': 'OUT_TEMPDECISION'.lower()})
data_frame = data_frame.loc[pd.notnull(data_frame.out_tempdecision)].copy()
data_frame.reset_index(inplace = True ,drop = True)
g = dfi.groupby('session_id')
dfi_time = pd.DataFrame({'start_time':g.start_time.min() ,
'end_time':g.end_time.max() ,
'id':g.id.max()})
dfi_time.reset_index(inplace = True)
data_frame = data_frame.merge(dfi_time ,how = 'left' ,on = 'session_id')
dfi_version = dfi.drop_duplicates('session_id')
data_frame = data_frame.merge(dfi_version[['session_id' ,'version' ,'project_name']] ,how = 'left' ,on = 'session_id')
data_frame['apply_date'] = data_frame.start_time.apply(lambda x:pc.str2date(str(x)))
data_frame['start_time']=pd.to_datetime(data_frame['start_time'],format='%Y-%m-%d')
data_frame['end_time']=pd.to_datetime(data_frame['end_time'],format='%Y-%m-%d')
data_frame['cost_time'] = data_frame.apply(lambda x:(x['end_time'] - x['start_time']).seconds ,axis = 1)
cols_rep = data_frame.columns.to_list()
cols_x = [i for i in cols_rep if i.endswith('_x')]
cols_y = [i for i in cols_rep if i.endswith('_y')]
if len(cols_x):
for i in range(len(cols_x)):
data_frame[cols_x[i][:-2]] = data_frame.apply(lambda x: x[cols_x[i]] if pd.notnull(x[cols_x[i]]) else x[cols_y[i]] ,axis = 1)
data_frame.drop_col(cols_x + cols_y ,inplace=True)
cols_name = ['session_id' , 'customerName','idCard' , 'phoneNo','apply_date' ,'start_time',
'end_time','out_tempdecision','reason_code','id' ]
cols_col = data_frame.columns.to_list()
cols_name = [i for i in cols_name if i in cols_col]
for i in cols_name:
cols_col.remove(i)
cols_name.extend(cols_col)
data_frame[cols_name]
data_frame = data_frame[cols_name].copy()
if len(pc.re_search('dk_mx_loanRate_list',data_frame)):
data_frame['dk_mx_loanRate_list'] = data_frame['dk_mx_loanRate_list'].astype(str)
data_frame.drop_duplicates('session_id' ,keep = 'last' ,inplace = True)
data_frame.reset_index(inplace = True ,drop = True)
return data_frame
sqlStr='''select session_id ,start_time ,end_time ,input_data ,temp_data ,
output_data ,project_name ,version,id from 表 where dt = 'now' '''
conn = jaydebeapi.connect(dirver, url, [user, password], jarFile)
curs=conn.cursor()
curs.execute(sqlStr)
data_allresult= pd.read_sql_query(sqlStr,conn)
data_allresult['input_data'] = data_allresult['input_data'].apply(lambda x: str(x)[1:-1])
data_allresult['temp_data'] = data_allresult['temp_data'].apply(lambda x: str(x)[1:-1])
data_allresult['output_data'] = data_allresult['output_data'].apply(lambda x: str(x)[1:-1])
bkbsx= get_final_data(data_allresult ,'bangkebangSX')
bkbyx = get_final_data(data_allresult ,'bangkebangYX')