import os
import requests
import requests
from bs4 import BeautifulSoup
from typing import Dict
total_div: Dict[str, BeautifulSoup] = dict()
def the_big_div(text: str):
soup = BeautifulSoup(text, 'lxml')
count = soup.find_all('div', class_='code-content content')
for item in count:
aa_ = item.find('a',class_='sortid')
if aa_:
total_div[aa_.text] = item
def download_file(url, local_file_path):
# Ensure the directory exists
with open(local_file_path, 'wb') as f:
f.write(requests.get(url).content)
# Example usage:
# download_file('https://example.com/file.torrent', '/torrent/file.torrent')
def parser_the_sub_html(url: str) -> str:
soup = BeautifulSoup(requests.get(url).text, 'lxml')
return soup.find('a',class_='xfcomment').get('href')
def clean_the_file_name(name: str) -> str:
name = name.replace(' ', '_').replace(':', '').replace('?', '').replace('*', '').replace('\n','').replace('/','_')
if len(name) > 100:
return name[:100]
else:
return name
if __name__ == '__main__':
url = 'https://share.xfsub.org'
for i in range(101, 202):
sub_urls = f'https://share.xfsub.org/sort-1-{i}.html'
the_big_div(requests.get(sub_urls).text)
print(total_div.keys())
# the address of the subitem page
all_subitem = total_div['动画'].find_all('a', class_='name-text')
# iterate over the subitems
for sub_item in all_subitem:
download_file(
parser_the_sub_html(
url + sub_item.get('href')
),
'torrent/'+ clean_the_file_name(f'{sub_item.text}.torrent')
)
print(sub_item.text.strip(),'\t'*5,'________________ -> done')
import os
import re
from collections import Counter
import matplotlib.pyplot as plt
import pandas as pd
# read the list of the file name in the directory
def get_file_list(path: str) -> list:
file_list = os.listdir(path)
return file_list
if __name__ == '__main__':
total_list = []
plt.rcParams['font.sans-serif'] = ['SimHei']
for item in get_file_list(r'C:\Users\123\Desktop\Project_Try\demo\src\test\java\scripts\torrent'):
group_ = re.match(r'[\[\【](.*?)[\]\】]', item)
if group_:
total_list.append(group_.group(1))
count = Counter(total_list)
pd1 = pd.DataFrame(count.most_common(), columns=['资源', '数量'])
plt.bar(pd1['资源'].to_list(), pd1['数量'].to_list())
plt.title('资源统计')
plt.show()
标签:__,Handle,name,Python,Completed,list,item,file,import
From: https://www.cnblogs.com/qiantaosama/p/18264146