#筛选类型数据和创建日期大于2022年1月1日,根据shaixuanleixingandbiaoti.py修改 class ShaiXuanLeiXingAndBiaoTi: def __init__(self,file_name): self.file_name = file_name self.mubiao_list = [] self.sheqi_list=[] self.read_list=self.readText(self.file_name) self.end_num = 0 self.is_exist_ziduan = False #判断文件中是否存在ziduan,默认为False,如果存在,则置为True # 读取文件,以列表形式获取所有内容 def readText(self,file_name): with open(file=file_name, mode='r',encoding="utf-8") as f: read_list = f.readlines() return read_list #处理DataType所在行的数据,获取dataType的值 def getDataType(self,datatype_hang): datatype_list = datatype_hang.split(":") print("datatype_list:") print(datatype_list) ziduan_zhi_list = datatype_list[1].split(",") print("ziduan_zhi_list:") print(ziduan_zhi_list) ziduan_zhi = ziduan_zhi_list[0].strip() print("ziduan_zhi:") print(ziduan_zhi) print(type(ziduan_zhi)) return ziduan_zhi #获取要从保留的内容中删除,添加到删除列表中数据 def getDeleteDataList(self): print("处理前self.mubiao_list个数") print(len(self.mubiao_list)) print(self.mubiao_list) zhongjian_list = [] num = 0 while True: num = num + 1 print("删除目标,保存数据到中间列表中,循环处理第%s次" % str(num)) #获取 self.mubiao_list 中最后一项的内容 zuihouyixiang = self.mubiao_list[-1] zuihouyixiang_pan= zuihouyixiang.strip() #去掉前后空格 print("zuihouyixiang:") print(zuihouyixiang) self.mubiao_list.pop() # self.mubiao_list删除最后一项 zhongjian_list.append(zuihouyixiang) # 判断该行内容是否是""data": {",如果是则终止循环 if zuihouyixiang_pan == '"data": {': print("zuihouyixiangdai{") print(zuihouyixiang) break # 获取 self.mubiao_list 中最后一项的内容 zuihouyixiang = self.mubiao_list[-1] self.mubiao_list.pop() # self.mubiao_list删除最后一项 zhongjian_list.append(zuihouyixiang) print("处理后self.mubiao_list个数") print(len(self.mubiao_list)) print("zhongjian_list个数") print(len(zhongjian_list)) return zhongjian_list #将zhongjian_list中的内容导向存储到self.sheqi_list中 def getSheQiList(self,zhongjian_list): print("zhongjian_list:") print(zhongjian_list) zhongjian_list_len = len(zhongjian_list) for i in range(0,zhongjian_list_len): self.sheqi_list.append(zhongjian_list[-1]) #将zhongjian_list的最后一项加入到self.sheqi_list zhongjian_list.pop() #删除zhongjian_list的最后一项 print("self.sheqi_list:") print(self.sheqi_list) # 向下继续查找,找到第一个”},“,则停止 def xiangxia(self): k=0 for j in range(0,10): k=k+1 if (self.end_num+j < self.read_list_len): print("处理到self.read_list中第%s下表的的内容" % str(self.end_num+j)) one_hang = self.read_list[self.end_num+j] self.sheqi_list.append(one_hang) if "}" in one_hang.strip(): print("one_hang},") print(one_hang) break print("k:") print(k) return k #写入列表数据到文件中 def writeListToTxt(self,file_name,list_data): with open(file_name,"w",encoding="utf-8") as f: for one in list_data: f.write(one) #写入字符串内容到文件中 def writeStrToTxt(self,file_name,str): with open(file_name,"w",encoding="utf-8") as f: f.write(str) #处理一个实体对象,即一个中括号对象 # { # "data": { # "tagList": [ # "测试" # ], # "roomLiveTitle": "直播间标题-大会直播", # "coverOne": "\/tojoy\/tojoyClould\/backstageSystem\/image\/1633680869417.jpg", # "screenShot": "\/tojoy\/tojoyClould\/serverUpload\/202207\/14\/image\/1657783758511.jpg", # "roomLiveId": 4003879, # "coverTwo": "\/tojoy\/tojoyClould\/backstageSystem\/image\/1633680876038.jpg", # "status": 4, # "videoPlayUrl": "http:\/\/1259323955.vod2.myqcloud.com\/685cdfeevodcq1259323955\/b520d1e2387702293080090030\/f0.mp4?oss-cn-beijing.aliyuncs.com" # }, # "dataIndex": 1, # "dataType": 4, # "dataSource": 3 # }, def handleOneShiTi(self,ziduan="dataType",ziduanzhi="4"): # 遍历每一行 #第一次开始处理 for i in range(0,self.read_list_len): print("处理到第%s行的内容" % str(i)) #读取一行内容 one_hang = self.read_list[i] #如果 dataType 不在该行中 if ziduan.lower() not in one_hang.lower(): # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 else: # 如果判断 dataType 在该行中,则判断dataType的值 self.is_exist_ziduan=True #如果找到字段,就置为True print("遇到第一个%s值不是%s的%s行的内容" % (ziduan, ziduanzhi,i)) print(one_hang) data_type_zhi = self.getDataType(datatype_hang=one_hang) print(data_type_zhi) #如果字段值不是4 if data_type_zhi != ziduanzhi: break # 终止循环 else: # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 if not self.is_exist_ziduan: print("文件中不存在字段【%s】"%ziduan) else: print("处理到self.mubiao_list第%s个下标" % str(self.end_num)) print("从断点下标%s开始处理" % str(self.end_num)) zhongjian_list = self.getDeleteDataList() print("self.mubiao_list_hou:") print(self.mubiao_list) print(len(self.mubiao_list)) # 将zhongjian_list的内容倒向保存到self.sheqi_list中 self.getSheQiList(zhongjian_list) #向下继续查找,找到第一个”}“,则停止 k = self.xiangxia() self.end_num = self.end_num+k print("接着从断点 %s行开始处理" % str(self.end_num)) #循环处理后续内容 while self.end_num <self.read_list_len: # 第二次开始处理 # 接着从self.end_num开始读取,此时需要再进行判断 for i in range(self.end_num, self.read_list_len): print("处理到第%s行的内容" % str(i)) # 读取一行内容 one_hang = self.read_list[i] # 如果 dataType 不在该行中 if ziduan.lower() not in one_hang.lower(): # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 else: # 如果判断 dataType 在该行中,则判断dataType的值 print("遇到一个%s值不是%s的%s行的内容" % (ziduan, ziduanzhi, i)) print(one_hang) data_type_zhi = self.getDataType(datatype_hang=one_hang) print(data_type_zhi) # 如果字段值不是4(预期值) if data_type_zhi != ziduanzhi: break # 终止循环 else: # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 print("处理到self.mubiao_list第%s个下标" % str(self.end_num)) print("从断点下标%s开始处理" % str(self.end_num)) #如果起始值大于等于数列长度 if self.end_num>=self.read_list_len: print("从断点下标%s超过超过数列%s长度,终止循环" % (str(self.end_num),str(self.read_list_len))) break #就终止while循环 zhongjian_list = self.getDeleteDataList() print("self.mubiao_list_hou:") print(self.mubiao_list) print(len(self.mubiao_list)) # 将zhongjian_list的内容倒向保存到self.sheqi_list中 self.getSheQiList(zhongjian_list) # 向下继续查找,找到第一个”},“,则停止 k = self.xiangxia() self.end_num = self.end_num + k print("接着从断点 %s行开始处理" % str(self.end_num)) def handleFile(self,ziduan="dataType",ziduanzhi="4"): print("self.read_list:") print(self.read_list) #获取数列的长度 self.read_list_len = len(self.read_list) print("self.read_list_len:") print(self.read_list_len) self.handleOneShiTi(ziduan=ziduan,ziduanzhi=ziduanzhi) if not self.is_exist_ziduan: print("文件中不存在字段【%s】,不输出目标文件" % ziduan) return None else: # #将最后一行的内容写入到mubiao_list中 # self.mubiao_list.append(self.read_list[-1]) print(self.read_list_len) print(len(self.mubiao_list)) print(len(self.sheqi_list)) self.writeListToTxt(file_name="mubiao_%s_%s.txt"%(ziduan,ziduanzhi), list_data=self.mubiao_list) self.writeListToTxt(file_name="sheqi_not_%s_%s.txt"%(ziduan,ziduanzhi), list_data=self.sheqi_list) return "mubiao_%s_%s.txt"%(ziduan,ziduanzhi) #统计某个字段在列表中出现的次数 #将一个列表倒序写入另一个列表 def firstListDaoXuTOSecondList(self,first_list,second_list): first_list_len = len(first_list) #循环写入 for i in range(0,first_list_len): zuihouyixiang = first_list[-1] second_list.append(zuihouyixiang) first_list.pop() #删除最后一项 return second_list #将一个列表正序写入另一个列表 def firstListZhengXuTOSecondList(self,first_list,second_list): first_list_len = len(first_list) #循环写入 for i in range(0,first_list_len): second_list.append(first_list[i]) return second_list #根据一条重复的内容筛选出对应的整个对象 def getOneShiTiByOneZiduan(self,one_list,quanji_list): #获取一个对象的内容 one_duixiang_list= [] quanji_list_len = len(quanji_list) #获取总长度 xiabiao = int(one_list[0]) #存储一个实体对象的前半部分 one_duixiang_qian_list = [] #从下标开始,往上读取,读到第一个 ""data": {",如果是则终止循环 while True: one_duixiang_qian_list.append(quanji_list[xiabiao]) if '"data": {'.lower() in str(quanji_list[xiabiao]).lower(): break else: xiabiao = xiabiao-1 one_duixiang_qian_list.append(quanji_list[xiabiao-1]) #打印实体前半部分 print(one_duixiang_qian_list) xiabiao = int(one_list[0]) #重新获取下标 #存储一个实体对象的后半部分 one_duixiang_hou_list = [] #从下标开始,往下读取,读到第一个 “dataType”,然后dataType下第一个“}”终止 biaozhi = False #是否找到“dataType”的标识,默认为False,找到后置为True while True: xiabiao = xiabiao + 1 one_duixiang_hou_list.append(quanji_list[xiabiao]) #保存内容 if "dataType".lower() in str(quanji_list[xiabiao]).lower(): print("已经找到第一个节点dataType,再往下找到}就终止") biaozhi = True if biaozhi: if "}".lower() in str(quanji_list[xiabiao]).lower(): break #终止循环 # 打印实体后半部分 print(one_duixiang_hou_list) one_duixiang_list = self.firstListDaoXuTOSecondList(first_list=one_duixiang_qian_list, second_list=one_duixiang_list) one_duixiang_list = self.firstListZhengXuTOSecondList(first_list=one_duixiang_hou_list, second_list=one_duixiang_list) print("one_duixiang_list:") print(one_duixiang_list) return one_duixiang_list #获取文件的名字 #获取文件的后缀 #按要求筛选出字段的值 def shaiXuanRoomLiveTitle(self,one_list): print("筛选前的内容:【%s】" % one_list) print("写筛选规则") print("不处理下标") print("只处理行内容") hang_content = one_list[1] hang_content_list = hang_content.split(":") print("hang_content_list:") print(hang_content_list) #只获取字段值,不获取字段 hang_ziduanzhi = hang_content_list[1] print("hang_ziduanzhi:") print(hang_ziduanzhi) hang_ziduanzhi = hang_ziduanzhi.strip(" ") #去掉前后空格 print("去掉前后空格的hang_ziduanzhi:") hang_ziduanzhi = hang_ziduanzhi.strip("\n") #去掉换行符 print("去掉换行符的hang_ziduanzhi:") print(hang_ziduanzhi) hang_ziduanzhi = hang_ziduanzhi.strip(",") #去掉逗号 print("去掉逗号的hang_ziduanzhi:") print(hang_ziduanzhi) hang_ziduanzhi = hang_ziduanzhi.strip('"') #去掉引号 print("去掉引号的hang_ziduanzhi:") print(hang_ziduanzhi) print("这里可以继续加规则...") print("加一些规则") hang_ziduanzhi_list = hang_ziduanzhi.split(" ") #以空格分割 hang_ziduanzhi = hang_ziduanzhi_list[0] print("最终的hang_ziduanzhi:") print(hang_ziduanzhi) one_list[1] = hang_ziduanzhi #将筛选后的内容赋值给one_list的第二项 print("筛选后的内容:【%s】" % one_list) return one_list #根据筛选类型后输出的文件,从中再次筛选标题内容 def getRoomLiveTitle(self,ziduan="dataType",ziduanzhi="4",quchongziduan="roomLiveTitle"): mubiao_file = self.handleFile(ziduan=ziduan, ziduanzhi=ziduanzhi) if mubiao_file ==None: print("没有输出目标文件") else: print("文件中存在字段【%s】,目标文件为【%s】"%(ziduan,mubiao_file)) #读取目标文件中的所有roomLiveTitle的值 with open(file=mubiao_file, mode='r',encoding="utf-8") as f: mubiaofile_list = f.readlines() #存储所有带有"roomLiveTitle"的行 all_roomLiveTitle_list = [] #存储没有重复的RoomLiveTitle的值 roomLiveTitle_wuchongfu_list = [] #存储没有重复的RoomLiveTitle的值带下标 roomLiveTitle_wuchongfu_with_xiaobiao_list = [] #存储重复的RoomLiveTitle的值 roomLiveTitle_chongfu_list = [] #存储重复的RoomLiveTitle的值 roomLiveTitle_chongfu_with_xiaobiao_list = [] for i in range(0,len(mubiaofile_list)): if quchongziduan.lower() in str(mubiaofile_list[i]).lower(): #存储mubiaofile_list到 all_roomLiveTitle_list.append([i,mubiaofile_list[i]]) print("打印所有包含%s的下标和行内容:"%quchongziduan) for one in all_roomLiveTitle_list: print(one) #获取all_roomLiveTitle_list的长度 all_roomLiveTitle_list_len = len(all_roomLiveTitle_list) #遍历all_roomLiveTitle_list for i in range(0,all_roomLiveTitle_list_len): one_list = all_roomLiveTitle_list[i] one_list = self.shaiXuanRoomLiveTitle(one_list) #获取日期的值 if one_list[1] >= "2022-01-01": #这个就是判断规则 roomLiveTitle_wuchongfu_list.append(one_list[1]) else: #要处理去掉的内容,写入函数roomLiveTitle_chongfu_with_xiaobiao_list中 roomLiveTitle_chongfu_with_xiaobiao_list.append(one_list) print("无重复的包含%s的行的内容:" % quchongziduan) print("-"*60) for one in roomLiveTitle_wuchongfu_list: print(one) print("-" * 60) print("重复的包含%s的下标和行的内容:" % quchongziduan) for one in roomLiveTitle_chongfu_with_xiaobiao_list: print(one) #打印重复的标题数列 print("roomLiveTitle_chongfu_with_xiaobiao_list:") print(roomLiveTitle_chongfu_with_xiaobiao_list) if len(roomLiveTitle_chongfu_with_xiaobiao_list)<1: print("无重复【%s】"%quchongziduan) else: print("获取重复的内容:") #存储所有重复的内容 all_chongfu_shiti_list = [] #存储所有重复的实体({})内容的字符串 all_one_shiti_list_str_list = [] for i in range(0,len(roomLiveTitle_chongfu_with_xiaobiao_list)): print("循环获取内容") one_shiti_list = self.getOneShiTiByOneZiduan(one_list=roomLiveTitle_chongfu_with_xiaobiao_list[i], quanji_list=mubiaofile_list) # 获取去重后的内容中一个实体内容的字符串 one_chongfu_shiti_list_str = "".join(one_shiti_list) print("根据【%s】的内容获取到其对应的对象内容【%s】" %(roomLiveTitle_chongfu_with_xiaobiao_list[i],one_chongfu_shiti_list_str)) #添加one_chongfu_shiti_list_str到all_one_shiti_list_str_list中 all_one_shiti_list_str_list.append(one_chongfu_shiti_list_str) #这个就是把所有获取到的重复的对象放到一个列表中,一遍后续去重用 all_chongfu_shiti_list = self.firstListZhengXuTOSecondList(first_list=one_shiti_list, second_list=all_chongfu_shiti_list) print("all_chongfu_shiti_list:") print(all_chongfu_shiti_list) print("所有需要去掉的重复的行的内容:") for one in all_chongfu_shiti_list: print(one) print("所有需要去掉的重复的总行数:") print(len(all_chongfu_shiti_list)) #这个就是把重复的内容写入到文件mubiao_dataType_4_chongfu.txt中 chongfu_file_name = str(mubiao_file).strip(".txt")+"_chongfu"+".txt" self.writeListToTxt(file_name=chongfu_file_name, list_data=all_chongfu_shiti_list) #获取所有内容组成一个字符串 mubiaofile_list_str = "".join(mubiaofile_list) #这个是再次获取mubiao_dataType_4.txt中所有内容 #遍历替换重复的内容为空 for one in all_one_shiti_list_str_list: mubiaofile_list_str = mubiaofile_list_str.replace(one,"") #替换重复的内容为空 print("替换【%s】为空,即去掉该内容"%one) print("这个就是把去重后的内容写入到mubiao_dataType_4_quchong.txt文件中") #将替换后的内容赋值给quchonghou quchonghou = mubiaofile_list_str #将去重后的内容保存到文件中 quchong_file_name = str(mubiao_file).strip(".txt") + "_quchong" + ".txt" self.writeStrToTxt(file_name=quchong_file_name, str=quchonghou) if __name__ == '__main__': #获取某个dataType的数据 file_name = "new 2.txt" ziduan = "dataType2131" ziduanzhi = "5" quchongziduan = 'ewqeqe' sx = ShaiXuanLeiXingAndBiaoTi(file_name) sx.getRoomLiveTitle(ziduan=ziduan,ziduanzhi=ziduanzhi,quchongziduan=quchongziduan)
标签:逻辑,判断,hang,self,list,字段,mubiao,print,ziduanzhi From: https://www.cnblogs.com/jingzaixin/p/16615698.html