性能测试时,需使用生产环境各接口请求比例分配接口请求比,nginx统计脚本如下:
import re import pandas as pd import xlwt obj = re.compile( r'(?P<ip>.*?)- - \[(?P<time>.*?)\] "(?P<request>.*?)" (?P<request_time>.*?) (?P<status>.*?) (?P<bytes>.*?) "(?P<referer>.*?)" "(?P<ua>.*?)"') def load_log(path): lst = [] error_lst = [] i = 0 with open(path, mode="r", encoding="utf-8") as f: for line in f: line = line.strip() dic = parse(line) if dic: lst.append(dic) else: error_lst.append(line) i += 1 return lst, error_lst def NumIn(s): for char in s: if char.isdigit(): return True return False def parse(line): dic = {} try: result = obj.match(line) time = result.group("time") time = time.replace(" +0800", "") time_min = time[:17] time_10min = time[:16] time_hour = time[:14] dic['time'] = time dic['time_min'] = time_min dic['time_10min'] = time_10min dic['time_hour'] = time_hour request = result.group("request") a = request.split()[1].split("?")[0] c = '/'.join(a.split('/')[:5]) b = request.split()[0] for item in c.split('/')[4]: if NumIn(item): c='/'.join(a.split('/')[:4]) dic['request'] = b + " " + c return dic except: return False def analyse(lst,project): df = pd.DataFrame(lst) df = df[df['request'].str.contains(project)] request_time_count = pd.value_counts(df['time']).reset_index().rename(columns={"index": "time", "time": "count"}).iloc[:100, :] request_time_min_count = pd.value_counts(df['time_min']).reset_index().rename(columns={"index": "time_min", "time_min": "count"}).iloc[:100, :] request_time_10min_count = pd.value_counts(df['time_10min']).reset_index().rename(columns={"index": "time_10min", "time_10min": "count"}).iloc[:100, :] request_time_hour_count = pd.value_counts(df['time_hour']).reset_index().rename(columns={"index": "time_hour", "time_hour": "count"}).iloc[:24, :] request_count = pd.value_counts(df['request']).reset_index().rename(columns={"index": "request", "request": "count"}).iloc[:, :] request_time_count_values = request_time_count.values request_time_min_count_values = request_time_min_count.values request_time_10min_count_values = request_time_10min_count.values request_time_hour_count_values = request_time_hour_count.values request_count_values = request_count.values wb = xlwt.Workbook() sheet = wb.add_sheet("url请求次数及占比") row = 0 sheet.write(row, 0, "request_url") sheet.write(row, 1, "request_type") sheet.write(row, 2, "count") sheet.write(row, 3, "百分比") sheet.write(row, 4, "请求总数") row += 1 sheet.write(row, 4, df.shape[0]) for item in request_count_values: sheet.write(row, 0, item[0].split(" ")[1]) sheet.write(row, 1, item[0].split(" ")[0]) sheet.write(row, 2, item[1]) sheet.write(row, 3, "%.2f%%" % (round(float(item[1]/df.shape[0]) * 100, 2))) row += 1 sheet = wb.add_sheet("秒级请求数top100") row = 0 sheet.write(row, 0, "time") sheet.write(row, 1, "count") row += 1 for item in request_time_count_values: sheet.write(row, 0, item[0]) sheet.write(row, 1, item[1]) row += 1 sheet = wb.add_sheet("分钟请求数top100") row = 0 sheet.write(row, 0, "time_min") sheet.write(row, 1, "count") row += 1 for item in request_time_min_count_values: sheet.write(row, 0, item[0]+':00'+"-"+item[0]+':59') sheet.write(row, 1, item[1]) row += 1 sheet = wb.add_sheet("10分钟请求数top100") row = 0 sheet.write(row, 0, "time10") sheet.write(row, 1, "count") row += 1 for item in request_time_10min_count_values: sheet.write(row, 0, item[0]+'0:00'+"-"+item[0]+'9:59') sheet.write(row, 1, item[1]) row += 1 sheet = wb.add_sheet("小时级请求数") row = 0 sheet.write(row, 0, "timehour") sheet.write(row, 1, "count") row += 1 for item in request_time_hour_count_values: sheet.write(row, 0, item[0]+':00:00'+"-"+item[0]+':59:59') sheet.write(row, 1, item[1]) row += 1 wb.save("nginx_log.xls") if __name__ == '__main__': lst, error_lst = load_log(path="D:\Desktop\\****imc.log") analyse(lst,project='/SVC***/')
统计结果如下:
标签:count,sheet,python,request,write,nginx,time,日志,row From: https://www.cnblogs.com/teangtang/p/17616087.html