清洗日期格式
import re
from datetime import datetime
# 读取文件
with open('result.txt', 'r') as file:
data = file.read()
# 使用正则表达式查找日期时间字符串
pattern = r'(\d{2}/[A-Za-z]{3}/\d{4}:\d{2}:\d{2}:\d{2} \+\d{4})'
matches = re.findall(pattern, data)
# 将匹配的日期时间字符串转换为所需的格式
for match in matches:
# 解析原日期时间字符串
old_date = datetime.strptime(match, '%d/%b/%Y:%H:%M:%S %z')
# 格式化新日期时间字符串
new_date = old_date.strftime('%Y/%m/%d %H:%M')
# 将原字符串替换为新字符串
data = data.replace(match, new_date)
# 将修改后的数据写回文件
with open('out.txt', 'w') as file:
file.write(data)
清洗ipv4格式
import re
import requests
import concurrent.futures
# 用于匹配IPv4地址的正则表达式
ipv4_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
# 用于发送API请求并获取城市信息的函数
def get_city_info(ip):
response = requests.get(f'http://ip-api.com/json/{ip}')
data = response.json()
return data['city'] if data['status'] == 'success' else ip
# 读取文件内容
with open('out.txt', 'r') as file:
content = file.read()
# 使用正则表达式查找所有IPv4地址
ip_addresses = re.findall(ipv4_pattern, content)
# 使用多线程并行处理IP地址查询
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
city_results = list(executor.map(get_city_info, ip_addresses))
# 替换IPv4地址为城市信息
for old_ip, new_city in zip(ip_addresses, city_results):
content = content.replace(old_ip, new_city)
# 将修改后的内容写回文件
with open('output.txt', 'w') as file:
file.write(content)
标签:city,python,ip,content,re,ipv4,file,格式,data
From: https://www.cnblogs.com/yzx-sir/p/17774435.html