import requests
import json
from lxml import etree
from concurrent.futures import ThreadPoolExecutor 导入多线程所需要的库
def mians(num):
url=f"http://www.1o1o.xyz/ctfarticle.asp?offset={num}"
domain="http://www.1o1o.xyz/"
res=requests.get(url)
res.encoding="gb2312"
print(res.text)
et=etree.HTML(res.text)
pdf_url=et.xpath("//th/a/@href")[0:-1]
pdf_name=et.xpath("//th/a/text()")[0:-2]
print(pdf_url)
print(pdf_name)
with ThreadPoolExecutor(30) as t: 开启线程池,自动处理线程开启和关闭
for i in range(30):
iamurl=domain+pdf_url[i]
name=pdf_name[i]
t.submit(dow,iamurl,name)
print("下载完成")
'''for i in range(30):
data=requests.get(domain+pdf_url[i])
pdf_content=data.content
name=pdf_name[i]
with open(f"E:\WP\{name}",'wb') as f:
f.write(pdf_content)'''
def dow(imaurl,name):
data=requests.get(imaurl)
pdf_content=data.content
with open(f"E:\WP\{name}",'wb') as f:
f.write(pdf_content)
if __name__ == '__main__':
for num in range(0,12451,30):
print(num)
mians(num)