首页 > 其他分享 >连续洗浴事件

连续洗浴事件

时间:2023-03-26 20:04:25浏览次数:34  
标签:loc plt 停顿 sj 洗浴 事件 水流量 data 连续

#10-1
import pandas as pd
import matplotlib.pyplot as plt

inputfile ='original_data.xls' 
data = pd.read_excel(inputfile) 


lv_non = pd.value_counts(data['有无水流'])['无']
lv_move = pd.value_counts(data['有无水流'])['有']

 
fig = plt.figure(figsize = (6 ,5))  
plt.rcParams['font.sans-serif'] = 'SimHei'  
plt.rcParams['axes.unicode_minus'] = False
plt.bar([0,1], height=[lv_non,lv_move], width=0.4, alpha=0.8, color='skyblue')
plt.xticks([index for index in range(2)], ['无','有'])
plt.xlabel('水流状态')
plt.ylabel('记录数')
plt.title('3109不同水流状态记录数')
plt.show()
plt.close()

water = data['水流量']

fig = plt.figure(figsize = (5 ,8))
plt.boxplot(water, 
            patch_artist=True,
            labels = ['水流量'],  
            boxprops = {'facecolor':'lightblue'})  
plt.title('3109水流量分布箱线图')
plt.grid(axis='y')
plt.show()

 

 

 

 

 

 

#10-5
data = pd.read_csv('water_heart.csv') 
sj = pd.read_csv('sj.csv') 

data["发生时间"] = pd.to_datetime(data["发生时间"],format="%Y%m%d%H%M%S")

timeDel = pd.Timedelta("0.5 sec")
sj["事件开始时间"] = data.iloc[sj["事件起始编号"]-1,0].values- timeDel
sj["事件结束时间"] = data.iloc[sj["事件终止编号"]-1,0].values + timeDel
sj['洗浴时间点'] = [i.hour for i in sj["事件开始时间"]]
sj["总用水时长"] = np.int64(sj["事件结束时间"] - sj["事件开始时间"])/1000000000 + 1


for i in range(len(data)-1):
    if (data.loc[i,"水流量"] != 0) & (data.loc[i + 1,"水流量"] == 0) :
        data.loc[i + 1,"停顿开始时间"] = data.loc[i +1, "发生时间"] - timeDel
    if (data.loc[i,"水流量"] == 0) & (data.loc[i + 1,"水流量"] != 0) :
        data.loc[i,"停顿结束时间"] = data.loc[i , "发生时间"] + timeDel
        

indStopStart = data.index[data["停顿开始时间"].notnull()]+1
indStopEnd = data.index[data["停顿结束时间"].notnull()]+1
Stop = pd.DataFrame(data={"停顿开始编号":indStopStart[:-1],
                            "停顿结束编号":indStopEnd[1:]}) 

Stop["停顿时长"] = np.int64(data.loc[indStopEnd[1:]-1,"停顿结束时间"].values-
                     data.loc[indStopStart[:-1]-1,"停顿开始时间"].values)/1000000000

for i in range(len(sj)):
    Stop.loc[(Stop["停顿开始编号"] > sj.loc[i,"事件起始编号"]) & 
           (Stop["停顿结束编号"] < sj.loc[i,"事件终止编号"]),"停顿归属事件"]=i+1
             

Stop = Stop[Stop["停顿归属事件"].notnull()]


stopAgg =  Stop.groupby("停顿归属事件").agg({"停顿时长":sum,"停顿开始编号":len})
sj.loc[stopAgg.index - 1,"总停顿时长"] = stopAgg.loc[:,"停顿时长"].values
sj.loc[stopAgg.index-1,"停顿次数"] = stopAgg.loc[:,"停顿开始编号"].values
sj.fillna(0,inplace=True)  
stopNo0 = sj["停顿次数"] != 0 
sj.loc[stopNo0,"平均停顿时长"] = sj.loc[stopNo0,"总停顿时长"]/sj.loc[stopNo0,"停顿次数"] 
sj.fillna(0,inplace=True)  
sj["用水时长"] = sj["总用水时长"] - sj["总停顿时长"] 
sj["用水/总时长"] = sj["用水时长"] / sj["总用水时长"]  
print('用水事件用水时长与频率特征构造完成后数据的特征为:\n',sj.columns)
print('用水事件用水时长与频率特征构造完成后数据的前5行5列特征为:\n',
      sj.iloc[:5,:5])

 

 

#10-6
data["水流量"] =data["水流量"] /60
sj["总用水量"]=0
for i in range(len(sj)):
    Start=sj.loc[i,"事件起始编号"]-1
    End=sj.loc[i,"事件终止编号"]-1
    if Start != End:
        for j in range(Start,End):
            if data.loc[j,"水流量"] !=0:
                sj.loc[i,"总用水量"] = (data.loc[j+1,"发生时间"] -
                                    data.loc[j,"发生时间"]).seconds*\
                                    data.loc[j,"水流量"] + sj.loc[i,"总用水量"]
        sj.loc[i,"总用水量"] = sj.loc[i,"总用水量"] + data.loc[End,"水流量"] * 2

    else:
        sj.loc[i,"总用水量"] = data.loc[Start,"水流量"]* 2
sj["平均水流量"] = sj["总用水量"] / sj["用水时长"] 
sj["水流量波动"]=0
for i in range(len(sj)):
    Start= sj.loc[i,"事件起始编号"] - 1
    End = sj.loc[i,"事件终止编号"] - 1
    for j in range(Start,End + 1):
        if data.loc[j,"水流量"] != 0:
            slbd = (data.loc[j,"水流量"] - sj.loc[i,"平均水流量"])**2
            slsj = (data.loc[j+1,"发生时间"] - data.loc[j,"发生时间"]).seconds
            sj.loc[i,"水流量波动"] = slbd * slsj + sj.loc[i,"水流量波动"]
    sj.loc[i,"水流量波动"] = sj.loc[i,"水流量波动"] / sj.loc[i,"用水时长"]
sj["停顿时长波动"]=0
for i in range(len(sj)):
    if sj.loc[i,"停顿次数"] > 1:
        for j in Stop.loc[Stop["停顿归属事件"] == (i+1),"停顿时长"].values:
            sj.loc[i,"停顿时长波动"] = ((j - sj.loc[i,"平均停顿时长"])**2) *j+ \
                                        sj.loc[i,"停顿时长波动"]
        sj.loc[i,"停顿时长波动"] = sj.loc[i,"停顿时长波动"] / sj.loc[i,"总停顿时长"]
print('用水量和波动属性构造完成后数据的属性为:\n',sj.columns)
print('用水量和波动属性构造完成后数据的前5行5列属性为: \n',sj.iloc[:5,:5])

 

 

 

 

#10-9
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
import joblib
import matplotlib.pyplot as plt

bpnn = joblib.load('water_heater_nnet.m')  # 加载模型
y_pred = bpnn.predict(x_stdtest)  # 返回预测结果
print('神经网络预测结果评价报告:\n',classification_report(y_test,y_pred))
# 绘制roc曲线图
plt.rcParams['font.sans-serif'] = 'SimHei'  # 显示中文
plt.rcParams['axes.unicode_minus'] = False  # 显示负号
fpr, tpr, thresholds = roc_curve(y_pred,y_test)  # 求出TPR和FPR
plt.figure(figsize=(6,4))  # 创建画布
plt.plot(fpr,tpr)  # 绘制曲线
plt.title('3109用户用水事件识别ROC曲线')  # 标题
plt.xlabel('FPR')  # x轴标签
plt.ylabel('TPR')  # y轴标签
plt.savefig('./用户用水事件识别ROC曲线.png')  # 保存图片
plt.show()  # 显示图形

 

标签:loc,plt,停顿,sj,洗浴,事件,水流量,data,连续
From: https://www.cnblogs.com/cl3109/p/17259298.html

相关文章