【一】threading模块介绍
- 多线程创建和多进程创建很像
- 我的理解是threading模块的作者遵循了鸭子类型
- 所以和multiprocessing模块的使用方法那么像
【二】开启线程的两种方式
方式一
- 直接调用 Thread 方法
from threading import Thread
import time
def task(name):
print(f'{name}任务开始')
time.sleep(2)
print(f'{name}任务结束')
if __name__ == '__main__':
t = Thread(target=task, args=('学习',))
t.start()
print('主线程')
方式二
- 继承Thread类
from threading import Thread
import time
class MyThread(Thread):
def __init__(self, name):
super().__init__()
self.name = name
def run(self) -> None:
print(f'{self.name}任务开始')
time.sleep(2)
print(f'{self.name}任务结束')
if __name__ == '__main__':
t = MyThread(name='打篮球')
t.start()
print('主线程')
- 用法基本和multiprocessing模块,也就是创建进程的方式一模一样
【三】查看ID
- 用threading模块里面的current_thread方法
from threading import Thread,current_thread
import time
def task(name):
print(f'{name}任务开始')
print(current_thread().native_id)
time.sleep(2)
print(f'{name}任务结束')
if __name__ == '__main__':
t = Thread(target=task, args=('学习',))
t.start()
print('主线程')
- 用os模块里面的getpid()方法
from threading import Thread
import time
from os import getpid
def task(name):
print(f'{name}任务开始')
print(getpid())
time.sleep(2)
print(f'{name}任务结束')
if __name__ == '__main__':
t = Thread(target=task, args=('学习',))
t.start()
print('主线程')
【四】多线程并发的socket服务端
- 服务端
from threading import Thread
import socket
from socket import SOL_SOCKET, SO_REUSEADDR
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
server.bind(('127.0.0.1', 8080))
server.listen(5)
def talk(conn):
while True:
try:
msg = conn.recv(1024).decode('utf-8')
print(f'客户端:>>>>{msg}')
conn.send(msg.upper().encode('utf-8'))
except Exception as error:
print(error)
break
conn.close()
def threading_version(conn):
t = Thread(target=talk, args=(conn,))
t.start()
def main_threading():
while True:
conn, addr = server.accept()
threading_version(conn=conn)
if __name__ == '__main__':
main_threading()
- 客户端(不变)
from socket import *
# 不写参数:默认是TCP协议
# (1)创建客户端对象
client = socket()
# (2)绑定 IP PORT
# (2)建立链接桥梁 --(呼应客户端的 ip 和 port)
IP = '127.0.0.1'
PORT = 8080
client.connect((IP, PORT))
# (4)链接循环
while True:
# (4.1)向服务端发数据
msg_to_server = input('请输入消息:>>>>').strip()
if not msg_to_server:
continue
client.send(msg_to_server.encode('utf-8'))
# 接受服务器返回的数据
data_from_server = client.recv(1024)
print(data_from_server.decode('utf-8'))
if msg_to_server == 'q':
break
client.close()
【五】单进程,多进程,多线程速度比较
- 通过一个爬虫案例
import requests
import os
import time
from lxml import etree
from multiprocessing import Process
from threading import Thread
def timer(func):
def inner(*args, **kwargs):
start_time = time.time()
func(*args, **kwargs)
print(f'总耗时{time.time() - start_time}秒')
return inner
def create_url_list():
url_list = []
for i in range(1, 6):
if i == 1:
url = 'https://pic.netbian.com/4kmeinv/'
url_list.append(url)
else:
url = f'https://pic.netbian.com/4kmeinv/index_{i}.html'
url_list.append(url)
return url_list
def spider_data(url_list):
data_dic = []
for url in url_list:
response = requests.get(url=url)
response.encoding = 'gbk'
response_text = response.text
html_obj = etree.HTML(response_text)
li_list = html_obj.xpath('//*[@id="main"]/div[3]/ul/li')
for li in li_list:
src = "https://pic.netbian.com/" + li.xpath('./a/img/@src')[0]
title = li.xpath('./a/b/text()')[0]
data_dic.append({'src': src, 'title': title})
return data_dic
def save_data(file_title, file_src):
file_name = 'image'
file_path = os.path.join(os.path.dirname(__file__), file_name)
os.makedirs(file_path, exist_ok=True)
file_path = os.path.join(file_path, f'{file_title}.png')
data = requests.get(file_src)
with open(file_path, 'wb') as fp:
fp.write(data.content)
print(f'当前图片{file_title}保存完成')
@timer
def main_normal():
url_list = create_url_list()
data_list = spider_data(url_list=url_list)
for data in data_list:
image_src = data.get('src')
image_title = data.get('title')
save_data(image_title, image_src)
@timer
def main_process():
url_list = create_url_list()
data_list = spider_data(url_list=url_list)
p_list = []
for data in data_list:
image_src = data.get('src')
image_title = data.get('title')
p = Process(target=save_data, args=(image_title, image_src))
p.start()
p_list.append(p)
for p in p_list:
p.join()
@timer
def main_thread():
url_list = create_url_list()
data_list = spider_data(url_list=url_list)
p_list = []
for data in data_list:
image_src = data.get('src')
image_title = data.get('title')
p = Thread(target=save_data, args=(image_title, image_src))
p.start()
p_list.append(p)
for p in p_list:
p.join()
if __name__ == '__main__':
...
# main_normal() 34s
# main_process() 9.7s
# main_thread() 7s
- 可以看到在多IO阻塞时的速度比较结果为
- 多线程>多进程>单进程