闲来无事,写个程序编排财大课表,也算是催促自己学习
项目地址:https://github.com/PaperMisty/Tjufe-course-arrange
其中,denoise2.py程序主要将课表src.png降噪为tmp.png,再识别输出到info.txt,由于课表本身太糙,噪点也多,识别起来难免有错误,于是需要对比着自己改一下;
确保无误后保存,info2ics.py程序将info.txt编排为两份ics日历文件,一个是GMT.ics(格林威治时间),一个是Beijing.ics(北京时间),取自己需要的导入即可。
如有错误,欢迎交流。
1 # encoding:utf-8 2 import cv2 3 import numpy as np 4 import ocr 5 # 严苛的去噪版本 6 7 # 读入图像并转为灰度图像 8 src = cv2.imread('src.png') 9 img = cv2.cvtColor(src,cv2.COLOR_BGR2GRAY) 10 11 12 # 定义去噪函数 13 def img_denoise(img,kernel,noise_num): 14 output = img.copy() 15 # 获取图像大小 16 height, width = img.shape[:2] 17 # 循环遍历图像每一个像素 18 for i in range(1, height - 1): 19 for j in range(1, width - 1): 20 # 获取kernel x kernel核的像素值 21 roi = img[i - 1:i + kernel - 1, j - 1:j + kernel - 1] 22 # 判断是否有不少于noise_num个像素近于0,判定为黑色成行或成列,进行保留;否则为孤点,取值255为白 23 if np.sum(roi < 130) >= noise_num: 24 output[i, j] = img[i, j] 25 else: 26 output[i, j] = 255 27 return output 28 img = img_denoise(img,3,2) 29 # 二次过滤 30 img = img_denoise(img,5,3) 31 32 # 显示输出图像 33 # cv2.imshow('img', img) 34 # cv2.waitKey(0) 35 # cv2.destroyAllWindows() 36 37 # 去噪图片储存 38 tmp_png_name = 'tmp.png' 39 cv2.imwrite(tmp_png_name,img) 40 # 获取识别结果文本 41 response = ocr.ocr_course(tmp_png_name) 42 # 文本整理输入到info.txt 43 ocr.info_format(response)denoise2.py
1 # encoding:utf-8 2 import requests 3 import base64 4 import pandas as pd 5 6 # 利用百度识别api识别图片文本信息 7 def ocr_course(path): 8 # client_id 为官网获取的AK, client_secret 为官网获取的SK 9 host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=CyPGn6Z0tCOxG747tru6KRb9&client_secret=2Fr6y4W5Gezp6H2RHt6OVx6uEXUuOOUz' 10 tokens = requests.get(host).json() 11 12 request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" 13 # 二进制方式打开图片文件 14 f = open(path, 'rb') 15 img = base64.b64encode(f.read()) 16 # 定义申请识别的参数 17 params = {"image":img} 18 access_token = tokens['access_token'] 19 request_url = request_url + "?access_token=" + access_token 20 headers = {'content-type': 'application/x-www-form-urlencoded'} 21 # 获取识别结果 22 response = requests.post(request_url, data=params, headers=headers) 23 return response 24 25 # 自定义文本纠正 26 def week_fix(str): 27 if str[0] == '-' or str[0] == '~': 28 str = '一'+str[1:] 29 if str[-1] == '欢': 30 str = str[:-1]+'双' 31 return str 32 33 # 将识别结果输入到info.txt 34 def info_format(response): 35 if response: 36 seq = '' 37 information = '' 38 for item in response.json()['words_result']: 39 information += item['words']+' '+seq 40 # 将结果整理分开 41 if seq == '\n': 42 seq = '' 43 if "节" in item['words']: 44 seq = '\n' 45 else: 46 return None 47 #df = pd.read_excel('info.xlsx','w',sheet_name='Sheet1') 48 with open('info.txt','w',encoding='utf-8') as f: 49 f.write("#示例(除了XXX其余都需要仔细校对):") 50 f.write("XXXXXXXXX]课程名字 老师名字 1-17 二[5XXX]双 N3218"+'\n') 51 content = '' 52 tmp = '' 53 for i in information: 54 if i != '\n': 55 # 把每行内容累积起来 56 content += i 57 else: 58 # 换行,写入文本文件 59 info_list = content.split(' ') 60 # 自定义纠正 61 info_list[-3] = week_fix(info_list[-3]) 62 # 如果这条是上一课程信息的续写, 63 if len(info_list) < 6: 64 f.write(tmp+' '+info_list[-4]+' '+info_list[-3]+' '+info_list[-2]+' '+'\n') 65 # 如果这是新的课程 66 else: 67 f.write(info_list[1]+' '+info_list[-5]+' '+info_list[-4]+' '+info_list[-3]+' '+info_list[-2]+' '+'\n') 68 # 将这次部分内容暂存,供下次可能的使用 69 tmp = info_list[1]+' '+info_list[-5] 70 71 content = '' 72 #info_format(response)ocr.py
1 # 北京时间:GMT+8:00 2 import uuid 3 from datetime import datetime, timedelta 4 5 def create_ics_file(events,calendar_name): 6 # 生成ics文件头部信息 7 ics_file = 'BEGIN:VCALENDAR\nVERSION:2.0\nPRODID:-//hacksw/handcal//NONSGML v1.0//EN\n' 8 9 # 循环生成每个事件的ics信息 10 for event in events: 11 # 生成事件唯一标识符 12 event_uid = uuid.uuid4().hex 13 14 # 生成事件开始和结束时间 15 start_time = event['start_time'] 16 end_time = event['end_time'] 17 18 # 生成事件的ics信息 19 ics_event = f'BEGIN:VEVENT\nUID:{event_uid}\nDTSTAMP:{datetime.now().strftime("%Y%m%dT%H%M%SZ")}\nDTSTART:{start_time.strftime("%Y%m%dT%H%M%S")}\nDTEND:{end_time.strftime("%Y%m%dT%H%M%S")}\nSUMMARY:{event["summary"]}\nDESCRIPTION:{event["description"]}\nLOCATION:{event["location"]}\nEND:VEVENT\n' 20 21 # 将事件信息添加到ics文件中 22 ics_file += ics_event 23 24 # 添加ics文件尾部信息 25 ics_file += 'END:VCALENDAR\n' 26 27 # 将ics文件保存到本地 28 with open(calendar_name, 'w', encoding='utf-8') as f: 29 f.write(ics_file) 30 31 path = 'info.txt' 32 with open(path,'r',encoding='utf-8') as f: 33 contents = f.readlines() 34 # 从非注释行开始 35 structs = [] 36 for content in contents[1:]: 37 sub_dict = {} 38 con_list = content.split(' ') 39 # 课程 40 sub_dict['summary'] = con_list[0].split(']')[1] 41 # 老师 42 sub_dict['description'] = con_list[1] 43 # 周数 44 weeks = con_list[2] 45 if '-' in weeks: 46 sub_dict['week_start'] = int(weeks.split('-')[0]) 47 sub_dict['week_end'] = int(weeks.split('-')[1]) 48 else: 49 sub_dict['week_start'] = int(weeks) 50 sub_dict['week_end'] = int(weeks) 51 # 日程 52 schedule = con_list[3] 53 sub_dict['week_time'] = schedule[0] 54 # 默认只上两节(一单)课 55 schedule = schedule[2:] 56 sub_dict['start_course'] = schedule.split('-')[0] 57 if schedule[-1] == "单": 58 sub_dict['step'] = 2 59 sub_dict['week_parity'] = 0 60 elif schedule[-1] == "双": 61 sub_dict['step'] = 2 62 sub_dict['week_parity'] = 1 63 else: 64 sub_dict['step'] = 1 65 sub_dict['week_parity'] = 0 66 # 位置 67 sub_dict['location'] = con_list[4] 68 69 structs.append(sub_dict) 70 71 def arrange(time_block): 72 events = [] 73 # 课程遍历 74 for struct in structs: 75 # 周数遍历 76 days = -1 77 for week in range(struct['week_start']+struct['week_parity'],struct['week_end']+1,struct['step']): 78 sub_dict2 = {} 79 # 周的累计 80 days += 7*(week-1) 81 # 周具体时间修改 82 if struct['week_time'] == '一': 83 days += 1 84 elif struct['week_time'] == '二': 85 days += 2 86 elif struct['week_time'] == '三': 87 days += 3 88 elif struct['week_time'] == '四': 89 days += 4 90 elif struct['week_time'] == '五': 91 days += 5 92 elif struct['week_time'] == '六': 93 days += 6 94 elif struct['week_time'] == '日': 95 days += 7 96 # 教学楼时间修改 97 if struct['location'][0] in 'CDEFJM': 98 # 时段修改 99 if struct['start_course'] == '1': 100 hours = 0 101 minutes = 0 102 elif struct['start_course'] == '3': 103 hours = 2 104 minutes = 10 105 elif struct['start_course'] == '5': 106 hours = 5 107 minutes = 20 108 elif struct['start_course'] == '7': 109 hours = 7 110 minutes = 30 111 elif struct['start_course'] == '9': 112 hours = 9 113 minutes = 50 114 elif struct['start_course'] == '11': 115 hours = 11 116 minutes = 40 117 sub_dict2['start_time'] = datetime(2023, 2, 27, time_block, 10, 0)+timedelta(days=days,minutes=minutes,hours=hours) 118 sub_dict2['end_time'] = sub_dict2['start_time']+timedelta(minutes=30,hours=1) 119 sub_dict2['summary'] = struct['summary'] 120 sub_dict2['description'] = struct['description'] 121 sub_dict2['location'] = struct['location'] 122 else: 123 if struct['start_course'] == '1': 124 hours = 0 125 minutes = 0 126 elif struct['start_course'] == '3': 127 hours = 2 128 minutes = 0 129 elif struct['start_course'] == '5': 130 hours = 5 131 minutes = 20 132 elif struct['start_course'] == '7': 133 hours = 7 134 minutes = 20 135 elif struct['start_course'] == '9': 136 hours = 10 137 minutes = 0 138 elif struct['start_course'] == '11': 139 hours = 11 140 minutes = 50 141 sub_dict2['start_time'] = datetime(2023, 2, 27, time_block, 0, 0)+timedelta(days=days,minutes=minutes,hours=hours) 142 sub_dict2['end_time'] = sub_dict2['start_time']+timedelta(minutes=30,hours=1) 143 sub_dict2['summary'] = struct['summary'] 144 sub_dict2['description'] = struct['description'] 145 sub_dict2['location'] = struct['location'] 146 events.append(sub_dict2) 147 # 结束一周,days归位 148 days = -1 149 print(f'课程总数:{len(events)}') 150 return events 151 GMT_events = arrange(8) 152 Beijing_events = arrange(0) 153 create_ics_file(GMT_events,"GMT.ics") 154 create_ics_file(Beijing_events,"Beijing.ics")info2ics.py
src.png图片示例:
标签:info,week,财大,sub,课表,start,编排,time,struct From: https://www.cnblogs.com/PaperMisty/p/17151978.html