urllib.request.urlopen()源代码——urlopen()在干什么
返回opener.open(url, data, timeout)方法的结果
_opener = None # _opener被赋值为None
def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
*, cafile=None, capath=None, cadefault=False, context=None):
global _opener
if cafile or capath or cadefault:
import warnings
warnings.warn("cafile, capath and cadefault are deprecated, use a "
"custom context instead.", DeprecationWarning, 2)
if context is not None:
raise ValueError(
"You can't pass both context and any of cafile, capath, and "
"cadefault"
)
if not _have_ssl:
raise ValueError('SSL support not available')
context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
cafile=cafile,
capath=capath)
# send ALPN extension to indicate HTTP/1.1 protocol
context.set_alpn_protocols(['http/1.1'])
https_handler = HTTPSHandler(context=context)
opener = build_opener(https_handler)
elif context:
https_handler = HTTPSHandler(context=context)
opener = build_opener(https_handler)
elif _opener is None: # 默认情况下会调用build_opener()
_opener = opener = build_opener()
else:
opener = _opener
return opener.open(url, data, timeout)
urllib.request.build_opener()
build_opener()函数返回的是OpenerDirector类的实例
def build_opener(*handlers):
opener = OpenerDirector() # 创建OpenerDirector类的实例opener
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
HTTPDefaultErrorHandler, HTTPRedirectHandler,
FTPHandler, FileHandler, HTTPErrorProcessor,
DataHandler]
if hasattr(http.client, "HTTPSConnection"):
default_classes.append(HTTPSHandler)
skip = set()
for klass in default_classes:
for check in handlers:
if isinstance(check, type):
if issubclass(check, klass):
skip.add(klass)
elif isinstance(check, klass):
skip.add(klass)
for klass in skip:
default_classes.remove(klass)
for klass in default_classes:
opener.add_handler(klass())
for h in handlers:
if isinstance(h, type):
h = h()
opener.add_handler(h)
return opener # 返回实例opener
urllib.request.OpenerDirector().open()
由Class OpenerDirector
得OpenerDirector
为一个类。由上面的代码opener = build_opener()
得,opener为OpenerDirector类的一个实例即根据OpenerDirector类创建的对象(build_opener()将创建的实例赋值给opener)。
所以urlopen返回的结果是opener.open()方法的结果——响应(response),而opener.open()是一个更底层的方法,它允许自定义opener对象发送特定的请求,获取响应结果。
自定义opener对象发送请求(添加网络代理Proxy)
在build_opener()函数的参数中添加一个或多个处理程序(handlers)
from urllib.request import Request, ProxyHandler, build_opener
# 设置访问地址
url = 'http://httpbin/get'
# 设置请求对象
req = Request(url)
# 构建可以使用代理的服务器
# ProxyHandler({'type': 'ip: port'})
handler = ProxyHandler()
# 构建一个opener对象
opener = build_opener(handler)
# 发送请求
resp = opener.open(req)
# 打印结果
print(resp.read().decode())
使用OpenerDirector类的add_handler()方法
爬虫设置代理就是让别的服务器代替自己的服务器去获取数据。
代理分类
代理网站
小象代理
快代理
云代理
66ip代理
站大爷
开心代理
讯代理
标签:自定义,handler,IP,urllib,opener,build,context,OpenerDirector,klass From: https://www.cnblogs.com/qyly/p/18416308