#10-1 import pandas as pd import matplotlib.pyplot as plt inputfile ='original_data.xls' data = pd.read_excel(inputfile) lv_non = pd.value_counts(data['有无水流'])['无'] lv_move = pd.value_counts(data['有无水流'])['有'] fig = plt.figure(figsize = (6 ,5)) plt.rcParams['font.sans-serif'] = 'SimHei' plt.rcParams['axes.unicode_minus'] = False plt.bar([0,1], height=[lv_non,lv_move], width=0.4, alpha=0.8, color='skyblue') plt.xticks([index for index in range(2)], ['无','有']) plt.xlabel('水流状态') plt.ylabel('记录数') plt.title('3109不同水流状态记录数') plt.show() plt.close() water = data['水流量'] fig = plt.figure(figsize = (5 ,8)) plt.boxplot(water, patch_artist=True, labels = ['水流量'], boxprops = {'facecolor':'lightblue'}) plt.title('3109水流量分布箱线图') plt.grid(axis='y') plt.show()
#10-5 data = pd.read_csv('water_heart.csv') sj = pd.read_csv('sj.csv') data["发生时间"] = pd.to_datetime(data["发生时间"],format="%Y%m%d%H%M%S") timeDel = pd.Timedelta("0.5 sec") sj["事件开始时间"] = data.iloc[sj["事件起始编号"]-1,0].values- timeDel sj["事件结束时间"] = data.iloc[sj["事件终止编号"]-1,0].values + timeDel sj['洗浴时间点'] = [i.hour for i in sj["事件开始时间"]] sj["总用水时长"] = np.int64(sj["事件结束时间"] - sj["事件开始时间"])/1000000000 + 1 for i in range(len(data)-1): if (data.loc[i,"水流量"] != 0) & (data.loc[i + 1,"水流量"] == 0) : data.loc[i + 1,"停顿开始时间"] = data.loc[i +1, "发生时间"] - timeDel if (data.loc[i,"水流量"] == 0) & (data.loc[i + 1,"水流量"] != 0) : data.loc[i,"停顿结束时间"] = data.loc[i , "发生时间"] + timeDel indStopStart = data.index[data["停顿开始时间"].notnull()]+1 indStopEnd = data.index[data["停顿结束时间"].notnull()]+1 Stop = pd.DataFrame(data={"停顿开始编号":indStopStart[:-1], "停顿结束编号":indStopEnd[1:]}) Stop["停顿时长"] = np.int64(data.loc[indStopEnd[1:]-1,"停顿结束时间"].values- data.loc[indStopStart[:-1]-1,"停顿开始时间"].values)/1000000000 for i in range(len(sj)): Stop.loc[(Stop["停顿开始编号"] > sj.loc[i,"事件起始编号"]) & (Stop["停顿结束编号"] < sj.loc[i,"事件终止编号"]),"停顿归属事件"]=i+1 Stop = Stop[Stop["停顿归属事件"].notnull()] stopAgg = Stop.groupby("停顿归属事件").agg({"停顿时长":sum,"停顿开始编号":len}) sj.loc[stopAgg.index - 1,"总停顿时长"] = stopAgg.loc[:,"停顿时长"].values sj.loc[stopAgg.index-1,"停顿次数"] = stopAgg.loc[:,"停顿开始编号"].values sj.fillna(0,inplace=True) stopNo0 = sj["停顿次数"] != 0 sj.loc[stopNo0,"平均停顿时长"] = sj.loc[stopNo0,"总停顿时长"]/sj.loc[stopNo0,"停顿次数"] sj.fillna(0,inplace=True) sj["用水时长"] = sj["总用水时长"] - sj["总停顿时长"] sj["用水/总时长"] = sj["用水时长"] / sj["总用水时长"] print('用水事件用水时长与频率特征构造完成后数据的特征为:\n',sj.columns) print('用水事件用水时长与频率特征构造完成后数据的前5行5列特征为:\n', sj.iloc[:5,:5])
#10-6 data["水流量"] =data["水流量"] /60 sj["总用水量"]=0 for i in range(len(sj)): Start=sj.loc[i,"事件起始编号"]-1 End=sj.loc[i,"事件终止编号"]-1 if Start != End: for j in range(Start,End): if data.loc[j,"水流量"] !=0: sj.loc[i,"总用水量"] = (data.loc[j+1,"发生时间"] - data.loc[j,"发生时间"]).seconds*\ data.loc[j,"水流量"] + sj.loc[i,"总用水量"] sj.loc[i,"总用水量"] = sj.loc[i,"总用水量"] + data.loc[End,"水流量"] * 2 else: sj.loc[i,"总用水量"] = data.loc[Start,"水流量"]* 2 sj["平均水流量"] = sj["总用水量"] / sj["用水时长"] sj["水流量波动"]=0 for i in range(len(sj)): Start= sj.loc[i,"事件起始编号"] - 1 End = sj.loc[i,"事件终止编号"] - 1 for j in range(Start,End + 1): if data.loc[j,"水流量"] != 0: slbd = (data.loc[j,"水流量"] - sj.loc[i,"平均水流量"])**2 slsj = (data.loc[j+1,"发生时间"] - data.loc[j,"发生时间"]).seconds sj.loc[i,"水流量波动"] = slbd * slsj + sj.loc[i,"水流量波动"] sj.loc[i,"水流量波动"] = sj.loc[i,"水流量波动"] / sj.loc[i,"用水时长"] sj["停顿时长波动"]=0 for i in range(len(sj)): if sj.loc[i,"停顿次数"] > 1: for j in Stop.loc[Stop["停顿归属事件"] == (i+1),"停顿时长"].values: sj.loc[i,"停顿时长波动"] = ((j - sj.loc[i,"平均停顿时长"])**2) *j+ \ sj.loc[i,"停顿时长波动"] sj.loc[i,"停顿时长波动"] = sj.loc[i,"停顿时长波动"] / sj.loc[i,"总停顿时长"] print('用水量和波动属性构造完成后数据的属性为:\n',sj.columns) print('用水量和波动属性构造完成后数据的前5行5列属性为: \n',sj.iloc[:5,:5])
#10-9 from sklearn.metrics import classification_report from sklearn.metrics import roc_curve import joblib import matplotlib.pyplot as plt bpnn = joblib.load('water_heater_nnet.m') # 加载模型 y_pred = bpnn.predict(x_stdtest) # 返回预测结果 print('神经网络预测结果评价报告:\n',classification_report(y_test,y_pred)) # 绘制roc曲线图 plt.rcParams['font.sans-serif'] = 'SimHei' # 显示中文 plt.rcParams['axes.unicode_minus'] = False # 显示负号 fpr, tpr, thresholds = roc_curve(y_pred,y_test) # 求出TPR和FPR plt.figure(figsize=(6,4)) # 创建画布 plt.plot(fpr,tpr) # 绘制曲线 plt.title('3109用户用水事件识别ROC曲线') # 标题 plt.xlabel('FPR') # x轴标签 plt.ylabel('TPR') # y轴标签 plt.savefig('./用户用水事件识别ROC曲线.png') # 保存图片 plt.show() # 显示图形
标签:loc,plt,停顿,sj,洗浴,事件,水流量,data,连续 From: https://www.cnblogs.com/cl3109/p/17259298.html