要做的事:
1:获取日志内标红的数据(ipython模式正则捕获如下图)
2:转换相关选项的md5和反编码值
3:根据dm值和上个小时的年月日(如:20131027)(需要考虑凌晨零点几分的时对应的目录是昨天日期)找到相应目录,如不存在则创建
4:新建以上小时格式命名的文本(如:20131027050)写入数据
下面是代码:
#!/usr/bin/python #coding:utf-8 """ Logs parse and format than write to the rigth dir Author by Qfeian @20131130 """ import os import re from urllib import unquote import hashlib import datetime #from time import sleep def hour(n=0): """Timeformat for hours the default is the current time """ now = datetime.datetime.now() h = now + datetime.timedelta(hours=n) return h.strftime('%Y%m%d'), h.strftime('%Y%m%d%H') def hash_str(str): """md5 encryption""" m = hashlib.md5() m.update(str) return m.hexdigest() def unquote_str(str): """urldecode by unquote""" s = unquote(str) return s def dir_query(str): """Check the dir if not exist create it """ s = os.path.isdir(str) if not s: os.makedirs(str) def path_query(paths,str): """如果第一次创建文本,则写入第一行内容""" s1 = os.path.isfile(paths) if not s1: f1 = open(paths,'a') f1.write(str) f1.close() def main(): log_dir = "/usr/local/nginx/logs/click.master.com_log/" #需要分析日志的路径 S_dir = "/data/app/click.master.com/logs/dlogs/" #分析后日志保存路径 str1 = "tm #*# uid #*# os #*# br #*# ip #*# ul #*# pt #*# pm #*# tl #*# co\n" """"format last hour time """ (D, H) = hour(-1) log_name = 'click.master.com_access.log-' + H #上小时nginx日志文件 log_path = os.path.join(log_dir, log_name) logfile = open(log_path, 'r') crg = re.compile(ur"(^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" ur".*&dm=(.*?)" ur"&ul=(.*?)" ur"&tm=(.*?)" ur"&os=(.*?)" ur"&br=(.*?)" ur"&.*?&uid=(\d+)" ur"&pt=(.*?)" ur"&tl=(.*?)" ur"&co=(.*?)&") for lines in logfile.readlines(): """ check up on the nginx logs by regular""" a = crg.findall(lines) """ if matching write to the logfile """ if a: (ip, dm, ul, tm, OS, br, uid, pt, tl, co) = a[0] pt = unquote_str(pt) tl = unquote_str(tl) co = unquote_str(co) pm = hash_str(pt) s = "%s #*# %s #*# %s #*# %s #*# %s #*# %s #*# %s #*# %s #*# %s #*# %s\n" % \ (tm, uid, OS, br, ip, ul, pt, pm, tl, co) W_log = os.path.join(S_dir, dm, D, H) W_log = W_log + '.log' W_dir = os.path.dirname(W_log) """ check logdir and logpath""" dir_query(W_dir) path_query(W_log,str1) """ write logs """ f = open(W_log, 'a') f.write(s) # print s # print dm, "\n" # sleep(1) f.close() logfile.close() if __name__ == "__main__": main()
标签:log,python,ur,str,path,日志,解析,os,dir From: https://www.cnblogs.com/oceaning/p/17146486.html