import requests
import csv
from bs4 import BeautifulSoup
def get_domain_update_time(domain):
"""
获取域名在 GitHub 上的更新时间。
Args:
domain: 域名。
Returns:
域名的更新时间 (UTC)。
"""
url = f"https://api.github.com/search/code?q={domain}+in:file+repo:stamparm/maltrail"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
if data["total_count"] > 0:
# 获取最新的提交信息
sha = data["items"][0]["sha"]
url = f"https://api.github.com/repos/stamparm/maltrail/commits/{sha}"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
return data["commit"]["committer"]["date"]
else:
return "未找到"
else:
print("请求失败:", response.status_code)
return None
def get_domain_list(file_path):
"""
从文件中提取所有域名。
Args:
file_path: 文件路径。
Returns:
域名列表。
"""
with open(file_path, "r") as f:
soup = BeautifulSoup(f, "html.parser")
# 提取所有域名
domains = [element.string for element in soup.find_all("a")]
return domains
def main(file_path):
"""
主函数。
"""
# 获取域名列表
domains = get_domain_list(file_path)
# 获取域名更新时间
domain_update_times = {}
for domain in domains:
domain_update_times[domain] = get_domain_update_time(domain)
# 输出结果
with open("domain_update_times.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["域名", "更新时间"])
for domain, update_time in domain_update_times.items():
writer.writerow([domain, update_time])
if __name__ == "__main__":
# 替换为 Maltrail 项目中 trails 目录下某个文件的路径
file_path = "path/to/maltrail/file"
main(file_path)