1.安装模块
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple huaweicloudsdkcdn pip install -i https://pypi.tuna.tsinghua.edu.cn/simple datetime
2.获取前一天日志文件
import os import requests import shutil import gzip import glob import json from datetime import datetime, timedelta from urllib.parse import urlencode from huaweicloudsdkcore.auth.credentials import GlobalCredentials from huaweicloudsdkcdn.v2.region.cdn_region import CdnRegion from huaweicloudsdkcore.exceptions import exceptions from huaweicloudsdkcdn.v2 import * def download_and_extract_logs(domain, start_time, output_dir, dir_time): if not os.path.exists(output_dir): os.makedirs(output_dir) domain_dir = os.path.join(output_dir, domain) if not os.path.exists(domain_dir): os.makedirs(domain_dir) date_dir = os.path.join(domain_dir, str(dir_time)) if not os.path.exists(date_dir): os.makedirs(date_dir) page_number = 1 while True: ak = '' sk = '' credentials = GlobalCredentials(ak, sk) \ client = CdnClient.new_builder() \ .with_credentials(credentials) \ .with_region(CdnRegion.value_of("cn-north-1")) \ .build() try: request = ShowLogsRequest() request.domain_name = domain request.start_time = start_time request.page_size = 60 request.page_number = page_number request.enterprise_project_id = "0" response = client.show_logs(request) data_dict = json.loads(str(response)) except exceptions.ClientRequestException as e: print(e.status_code) print(e.request_id) print(e.error_code) print(e.error_msg) logs = data_dict.get('logs', []) if not logs: break for log in logs: log_url = log.get('link') if log_url: fields_1 = log_url.split('/') sevent_field = fields_1[6] fields_2 = sevent_field.split('?') filename = fields_2[0] # print(filename) log_filename = os.path.basename(filename) log_path = os.path.join(date_dir, log_filename) with requests.get(log_url, stream=True) as r: with open(log_path, 'wb') as f: shutil.copyfileobj(r.raw, f) page_number += 1 # 合并并解压同个域名一天内的所有文件;日志存储路径:域名/日期/文件 merged_log_path = os.path.join(date_dir, f'{domain}_{str(dir_time)}.log') with open(merged_log_path, 'wb') as merged_file: for gz_file in glob.glob(os.path.join(date_dir, '*.gz')): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) os.remove(gz_file) def main(): domains = ["www.test.cn"] current_time = datetime.now() dir_time = (current_time - timedelta(days=1)).strftime('%Y-%m-%d') yesterday = current_time - timedelta(days=1) midnight_yesterday = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day) start_time = int(midnight_yesterday.timestamp() * 1000) output_dir = 'cdn_logs' for domain in domains: download_and_extract_logs(domain, start_time, output_dir, dir_time) if __name__ == "__main__": main()
3.区分国内外访问IP
import os import requests import shutil import gzip import glob import json from datetime import datetime, timedelta from urllib.parse import urlencode from huaweicloudsdkcore.auth.credentials import GlobalCredentials from huaweicloudsdkcdn.v2.region.cdn_region import CdnRegion from huaweicloudsdkcore.exceptions import exceptions from huaweicloudsdkcdn.v2 import * def download_and_extract_logs(domain, start_time, output_dir, dir_time): if not os.path.exists(output_dir): os.makedirs(output_dir) domain_dir = os.path.join(output_dir, domain) if not os.path.exists(domain_dir): os.makedirs(domain_dir) # date_dir = os.path.join(domain_dir, str(dir_time)) # if not os.path.exists(date_dir): # os.makedirs(date_dir) page_number = 1 while True: ak = '' sk = '' credentials = GlobalCredentials(ak, sk) \ client = CdnClient.new_builder() \ .with_credentials(credentials) \ .with_region(CdnRegion.value_of("cn-north-1")) \ .build() try: request = ShowLogsRequest() request.domain_name = domain request.start_time = start_time request.page_size = 60 request.page_number = page_number request.enterprise_project_id = "0" response = client.show_logs(request) data_dict = json.loads(str(response)) except exceptions.ClientRequestException as e: print(e.status_code) print(e.request_id) print(e.error_code) print(e.error_msg) logs = data_dict.get('logs', []) if not logs: break for log in logs: log_url = log.get('link') if log_url: fields_1 = log_url.split('/') sevent_field = fields_1[6] fields_2 = sevent_field.split('?') filename = fields_2[0] # print(filename) log_filename = os.path.basename(filename) log_path = os.path.join(domain_dir, log_filename) with requests.get(log_url, stream=True) as r: with open(log_path, 'wb') as f: shutil.copyfileobj(r.raw, f) page_number += 1 # 解压并合并同个域名一天内的所有日志文件,日志存储路径:域名/日志文件 merged_log_path = os.path.join(domain_dir, f'{domain}_all_{str(dir_time)}.log') with open(merged_log_path, 'wb') as merged_file: for gz_file in sorted(glob.glob(os.path.join(domain_dir, '*.gz'))): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) # 解压并合并同个域名一天内的国内IP访问文件 merged_cn_log_path = os.path.join(domain_dir, f'{domain}_cn_{str(dir_time)}.log') with open(merged_cn_log_path, 'wb') as merged_file: for gz_file in sorted(glob.glob(os.path.join(domain_dir, '*cn.gz'))): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) os.remove(gz_file) # 解压并合并同个域名一天内的国外IP访问所有文件 merged_ov_log_path = os.path.join(domain_dir, f'{domain}_ov_{str(dir_time)}.log') with open(merged_ov_log_path, 'wb') as merged_file: for gz_file in sorted(glob.glob(os.path.join(domain_dir, '*ov.gz'))): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) os.remove(gz_file) def main(): domains = ["www.test.cn"] current_time = datetime.now() dir_time = (current_time - timedelta(days=1)).strftime('%Y-%m-%d') yesterday = current_time - timedelta(days=1) midnight_yesterday = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day) start_time = int(midnight_yesterday.timestamp() * 1000) output_dir = 'cdn_logs' for domain in domains: download_and_extract_logs(domain, start_time, output_dir, dir_time) if __name__ == "__main__": main()
4.循环获取前一个月内某段时间的日志
import os import requests import shutil import gzip import glob import json from datetime import datetime, timedelta from urllib.parse import urlencode from huaweicloudsdkcore.auth.credentials import GlobalCredentials from huaweicloudsdkcdn.v2.region.cdn_region import CdnRegion from huaweicloudsdkcore.exceptions import exceptions from huaweicloudsdkcdn.v2 import * def download_and_extract_logs(domain, start_time, output_dir, dir_time): if not os.path.exists(output_dir): os.makedirs(output_dir) domain_dir = os.path.join(output_dir, domain) if not os.path.exists(domain_dir): os.makedirs(domain_dir) # date_dir = os.path.join(domain_dir, str(dir_time)) # if not os.path.exists(date_dir): # os.makedirs(date_dir) page_number = 1 while True: ak = '' sk = '' credentials = GlobalCredentials(ak, sk) \ client = CdnClient.new_builder() \ .with_credentials(credentials) \ .with_region(CdnRegion.value_of("cn-north-1")) \ .build() try: request = ShowLogsRequest() request.domain_name = domain request.start_time = start_time request.page_size = 60 request.page_number = page_number request.enterprise_project_id = "0" response = client.show_logs(request) data_dict = json.loads(str(response)) except exceptions.ClientRequestException as e: print(e.status_code) print(e.request_id) print(e.error_code) print(e.error_msg) logs = data_dict.get('logs', []) if not logs: break for log in logs: log_url = log.get('link') if log_url: fields_1 = log_url.split('/') sevent_field = fields_1[6] fields_2 = sevent_field.split('?') filename = fields_2[0] # print(filename) log_filename = os.path.basename(filename) log_path = os.path.join(domain_dir, log_filename) with requests.get(log_url, stream=True) as r: with open(log_path, 'wb') as f: shutil.copyfileobj(r.raw, f) page_number += 1 # 解压并合并同个域名一天内的所有日志文件,日志存储路径:域名/日志文件 merged_cn_log_path = os.path.join(domain_dir, f'{domain}_all_{str(dir_time)}.log') with open(merged_cn_log_path, 'wb') as merged_file: for gz_file in glob.glob(os.path.join(domain_dir, '*.gz')): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) # 解压并合并同个域名一天内的国内IP访问文件 merged_cn_log_path = os.path.join(domain_dir, f'{domain}_cn_{str(dir_time)}.log') with open(merged_cn_log_path, 'wb') as merged_file: for gz_file in glob.glob(os.path.join(domain_dir, '*cn.gz')): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) os.remove(gz_file) # 解压并合并同个域名一天内的国外IP访问所有文件 merged_ov_log_path = os.path.join(domain_dir, f'{domain}_ov_{str(dir_time)}.log') with open(merged_ov_log_path, 'wb') as merged_file: for gz_file in glob.glob(os.path.join(domain_dir, '*ov.gz')): with gzip.open(gz_file, 'rb') as f_in: shutil.copyfileobj(f_in, merged_file) os.remove(gz_file) def main(): domains = ["www.test.cn", "www.ceshi.com"] # current_time = datetime.now() # dir_time = (current_time - timedelta(days=1)).strftime('%Y-%m-%d') # yesterday = current_time - timedelta(days=1) # midnight_yesterday = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day) # start_time = int(midnight_yesterday.timestamp() * 1000) output_dir = 'cdn_logs' # 设定起始日期 start_date = datetime(2024, 4, 8) # 设定循环的天数 num_days = 10 # 使用for循环遍历日期 for i in range(num_days): current_date = start_date + timedelta(days=i) dir_time = current_date.strftime("%Y-%m-%d") start_time = int(current_date.timestamp() * 1000) for domain in domains: download_and_extract_logs(domain, start_time, output_dir, dir_time) if __name__ == "__main__": main()
5.参考文档
https://support.huaweicloud.com/usermanual-cdn/zh-cn_topic_0073337424.html https://console.huaweicloud.com/apiexplorer/#/openapi/CDN/sdk?api=ShowLogs&version=v2标签:domain,log,python,CDN,time,path,日志,os,dir From: https://www.cnblogs.com/sswind/p/18206195