Settings概述
文件路径:scrapy/settings/init.py scrapy/settings/default_settings.py
scrapy/settings这个文件夹中有两个文件。在__init__.py文件中定义了scrapy中的设置对象的核心类。default_settings.py文件中定义了scrapy所有的默认配置,这个就是记录scrapy默认配置的文件。
settings/__init__py文件详解
该文件可以大致分为五部分,我们分为5段代码解析
第一块:导入库与类型检查代码
from __future__ import annotations #这个特殊导入语句的作用是改变 Python 解释器对类型提示的处理方式,将类型提示中的类型表达式保留为字符串,直到需要时才进行解析,从而避免可能导致的一些潜在问题。
import copy
import json
from collections.abc import Iterable, Iterator, Mapping, MutableMapping
from importlib import import_module
from pprint import pformat
from typing import TYPE_CHECKING, Any, Union, cast
from scrapy.settings import default_settings
# The key types are restricted in BaseSettings._get_key() to ones supported by JSON,
# see https://github.com/scrapy/scrapy/issues/5383.
_SettingsKeyT = Union[bool, float, int, str, None] # 联合类型; Union[X, Y] 等价于 X | Y ,意味着满足 X 或 Y 之一。
"""
# 被第三方静态类型检查器假定为 True 的特殊常量。 正常运行的时候 False。
https://docs.python.org/zh-cn/3.9/library/typing.html?highlight=type_checking#typing.TYPE_CHECKING
"""
if TYPE_CHECKING:
from types import ModuleType
# https://github.com/python/typing/issues/445#issuecomment-1131458824
from _typeshed import SupportsItems
# typing.Self requires Python 3.11
from typing_extensions import Self
_SettingsInputT = Union[SupportsItems[_SettingsKeyT, Any], str, None]
需要特别注意的是:
annotations 是用来提示解释器该如何处理类型提示的。基本上每个scarpy文件第一句都是导入这个函数
_SettingsKeyT:是一个联合类型。用来指定settings对象的键的类型的。
TYPE_CHECKING:用来处理静态检查器检查的时候的情况的。正常运行的是False
第二部分
这个部分比较直白。就是定义一个容器,然后配套一个方法,去这个容器里取值。
"""
SETTINGS_PRIORITIES中记录了不同级别的分数。
get_settings_priority方法就是用来通过级别来取分数的,参数传入,如果是级别,就返回对应分数。如果是分数,正好就直接返回了
"""
SETTINGS_PRIORITIES: dict[str, int] = {
"default": 0,
"command": 10,
"addon": 15,
"project": 20,
"spider": 30,
"cmdline": 40,
}
def get_settings_priority(priority: int | str) -> int:
"""
Small helper function that looks up a given string priority in the
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` dictionary and returns its
numerical value, or directly returns a given numerical priority.
"""
if isinstance(priority, str):
return SETTINGS_PRIORITIES[priority]
return priority
这里定义了一个SETTINGS_PRIORITIES字典对象,用来存储不同级别所对应的分数。这可以用来实现scrapy中的不同设置的分数,的优先级。
另外get_settings_priority方法,就是去SETTINGS_PRIORITIES去更具级别去取分数。
第三部分
这里定义SettingsAttribute类。scrapy中的设置中的值就是用这个类生成的实例。
class SettingsAttribute:
"""Class for storing data related to settings attributes. 设置中的各个属性就是这个类的实例
This class is intended for internal usage, you should try Settings class
for settings configuration, not this one.
"""
def __init__(self, value: Any, priority: int): # 这里的value就是具体某一相的设置,value是这个属性的值,priority是优先级分数
self.value: Any = value
self.priority: int
if isinstance(self.value, BaseSettings): # 如果value是BaseSettings的实例,就去一下他的最大属性分数,然后再和现在赋值的分数去最大值
self.priority = max(self.value.maxpriority(), priority)
else:
self.priority = priority
def set(self, value: Any, priority: int) -> None:
"""Sets value if priority is higher or equal than current priority."""
if priority >= self.priority:
if isinstance(self.value, BaseSettings):
value = BaseSettings(value, priority=priority)
self.value = value
self.priority = priority
def __repr__(self) -> str: # __repr__正式的打印对象的信息
return f"<SettingsAttribute value={self.value!r} priority={self.priority}>"
第四部分 核心类 BaseSettings 与 Settings
scrapy中的setings是一个类似于字典的结构,但是还有冻结,优先级的功能。
BaseSettings是Settings的父类,与python中提供的默认数据类型字典类型为兄弟类。因为BaseSettings与字典类型都继承至MutableMapping
"""
MutableMapping是python提供的用于实现映射 mappings 的抽象基类
我们的目的是想创建一个类似与字典的,键值对的结构。所以继承自MutableMapping
MutableMapping 的键的类型通过_SettingsKeyT指定,值为any,任意类型都可以。
MutableMapping的文档
https://docs.python.org/zh-cn/3.9/library/collections.abc.html?highlight=mutablemapping#collections.abc.MutableMapping
https://docs.python.org/zh-cn/3.9/glossary.html#term-mapping
"""
class BaseSettings(MutableMapping[_SettingsKeyT, Any]):
__default = object()
def __init__(self, values: _SettingsInputT = None, priority: int | str = "project"):
self.frozen: bool = False
self.attributes: dict[_SettingsKeyT, SettingsAttribute] = {} # 就是用来容纳所有设置属性的,是个字典
if values:
self.update(values, priority)
"""
https://docs.python.org/zh-cn/3.9/library/collections.abc.html#collections-abstract-base-classes
因为是继承自MutableMapping,所以子类要实现的抽象方法有__getitem__, __setitem__, __delitem__, __iter__, __len__
另外还有一些方法:pop, popitem, clear, update,和 setdefault
其实字典也是继承自MutableMapping,可以把BaseSettings理解成dict的兄弟类。
"""
def __getitem__(self, opt_name: _SettingsKeyT) -> Any: # 在b = aa['name']的时候其实是在调用这个函数
if opt_name not in self:
return None
return self.attributes[opt_name].value
def __setitem__(self, name: _SettingsKeyT, value: Any) -> None:
self.set(name, value)
def __delitem__(self, name: _SettingsKeyT) -> None:
self._assert_mutability()
del self.attributes[name]
def __iter__(self) -> Iterator[_SettingsKeyT]:
return iter(self.attributes)
def __len__(self) -> int:
return len(self.attributes)
"""
这个是在爷爷Mapping类中定义的抽象方法
"""
def __contains__(self, name: Any) -> bool:
return name in self.attributes
"""
获取设置的中值,基础的为get方法,然后衍生出6个方法,包括了5个基本数据类型对应的get,和一个dict转list的方法
拓展出获取_BASE中的设置
"""
def get(self, name: _SettingsKeyT, default: Any = None) -> Any:
"""
Get a setting value without affecting its original type.
:param name: the setting name
:type name: str
:param default: the value to return if no setting is found
:type default: object
"""
return self[name] if self[name] is not None else default
def getbool(self, name: _SettingsKeyT, default: bool = False) -> bool:
"""
Get a setting value as a boolean. 获取bool值
``1``, ``'1'``, `True`` and ``'True'`` return ``True``,
while ``0``, ``'0'``, ``False``, ``'False'`` and ``None`` return ``False``.
For example, settings populated through environment variables set to
``'0'`` will return ``False`` when using this method.
:param name: the setting name
:type name: str
:param default: the value to return if no setting is found
:type default: object
"""
got = self.get(name, default)
try:
return bool(int(got))
except ValueError:
if got in ("True", "true"):
return True
if got in ("False", "false"):
return False
raise ValueError(
"Supported values for boolean settings "
"are 0/1, True/False, '0'/'1', "
"'True'/'False' and 'true'/'false'"
)
def getint(self, name: _SettingsKeyT, default: int = 0) -> int:
"""
Get a setting value as an int.
:param name: the setting name
:type name: str
:param default: the value to return if no setting is found
:type default: object
"""
return int(self.get(name, default))
def getfloat(self, name: _SettingsKeyT, default: float = 0.0) -> float:
"""
Get a setting value as a float.
:param name: the setting name
:type name: str
:param default: the value to return if no setting is found
:type default: object
"""
return float(self.get(name, default))
def getlist(
self, name: _SettingsKeyT, default: list[Any] | None = None
) -> list[Any]:
"""
Get a setting value as a list. If the setting original type is a list, a
copy of it will be returned. If it's a string it will be split by ",".
For example, settings populated through environment variables set to
``'one,two'`` will return a list ['one', 'two'] when using this method.
:param name: the setting name
:type name: str
:param default: the value to return if no setting is found
:type default: object
"""
value = self.get(name, default or [])
if isinstance(value, str):
value = value.split(",")
return list(value)
def getdict(
self, name: _SettingsKeyT, default: dict[Any, Any] | None = None
) -> dict[Any, Any]:
"""
Get a setting value as a dictionary. If the setting original type is a
dictionary, a copy of it will be returned. If it is a string it will be
evaluated as a JSON dictionary. In the case that it is a
:class:`~scrapy.settings.BaseSettings` instance itself, it will be
converted to a dictionary, containing all its current settings values
as they would be returned by :meth:`~scrapy.settings.BaseSettings.get`,
and losing all information about priority and mutability.
:param name: the setting name
:type name: str
:param default: the value to return if no setting is found
:type default: object
"""
value = self.get(name, default or {})
if isinstance(value, str):
value = json.loads(value)
return dict(value)
def getdictorlist(
self,
name: _SettingsKeyT,
default: dict[Any, Any] | list[Any] | tuple[Any] | None = None,
) -> dict[Any, Any] | list[Any]:
"""Get a setting value as either a :class:`dict` or a :class:`list`.
If the setting is already a dict or a list, a copy of it will be
returned.
If it is a string it will be evaluated as JSON, or as a comma-separated
list of strings as a fallback.
For example, settings populated from the command line will return:
- ``{'key1': 'value1', 'key2': 'value2'}`` if set to
``'{"key1": "value1", "key2": "value2"}'``
- ``['one', 'two']`` if set to ``'["one", "two"]'`` or ``'one,two'``
:param name: the setting name
:type name: string
:param default: the value to return if no setting is found
:type default: any
"""
value = self.get(name, default)
if value is None:
return {}
if isinstance(value, str):
try:
value_loaded = json.loads(value)
assert isinstance(value_loaded, (dict, list))
return value_loaded
except ValueError:
return value.split(",")
if isinstance(value, tuple):
return list(value)
assert isinstance(value, (dict, list))
return copy.deepcopy(value)
"""
获取_BASE设置中的设置值
"""
def getwithbase(self, name: _SettingsKeyT) -> BaseSettings:
"""Get a composition of a dictionary-like setting and its `_BASE`
counterpart.
:param name: name of the dictionary-like setting
:type name: str
"""
if not isinstance(name, str):
raise ValueError(f"Base setting key must be a string, got {name}")
compbs = BaseSettings()
compbs.update(self[name + "_BASE"])
compbs.update(self[name])
return compbs
"""
获取属性的优先级
"""
def getpriority(self, name: _SettingsKeyT) -> int | None:
"""
Return the current numerical priority value of a setting, or ``None`` if
the given ``name`` does not exist.
:param name: the setting name
:type name: str
"""
if name not in self:
return None
return self.attributes[name].priority
"""
获取所有设置中的,最大的优先级
"""
def maxpriority(self) -> int:
"""
Return the numerical value of the highest priority present throughout
all settings, or the numerical value for ``default`` from
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` if there are no settings
stored.
"""
if len(self) > 0:
return max(cast(int, self.getpriority(name)) for name in self)
return get_settings_priority("default")
"""
设置属性,衍生出了4个其他的方法,setdefault, update, setdict, setmodule
"""
def set(
self, name: _SettingsKeyT, value: Any, priority: int | str = "project" #
) -> None:
"""
Store a key/value attribute with a given priority.
Settings should be populated *before* configuring the Crawler object
(through the :meth:`~scrapy.crawler.Crawler.configure` method),
otherwise they won't have any effect.
:param name: the setting name
:type name: str
:param value: the value to associate with the setting
:type value: object
:param priority: the priority of the setting. Should be a key of
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
:type priority: str or int
"""
self._assert_mutability() # 先断言是否是被冻结了
priority = get_settings_priority(priority) # 获取分数
if name not in self: # 如若之前没有这个属性。那就看值是否是SettingsAttribute的实例,是的话直接赋值,不是的话实例化一下
if isinstance(value, SettingsAttribute):
self.attributes[name] = value
else:
self.attributes[name] = SettingsAttribute(value, priority)
else:
self.attributes[name].set(value, priority) # 如果已经有这个属性了,就是属性对象设置值的逻辑了
"""
这里,没有值的话就直接设置值为default了,如果有的话,不设置,直接返回了原先的结果
"""
def setdefault(
self,
name: _SettingsKeyT,
default: Any = None,
priority: int | str = "project",
) -> Any:
if name not in self:
self.set(name, default, priority)
return default
return self.attributes[name].value
"""
先把values处理一下,最后再调用self.set
"""
def update(self, values: _SettingsInputT, priority: int | str = "project") -> None: # type: #ignore[override]
"""
Store key/value pairs with a given priority.
This is a helper function that calls
:meth:`~scrapy.settings.BaseSettings.set` for every item of ``values``
with the provided ``priority``.
If ``values`` is a string, it is assumed to be JSON-encoded and parsed
into a dict with ``json.loads()`` first. If it is a
:class:`~scrapy.settings.BaseSettings` instance, the per-key priorities
will be used and the ``priority`` parameter ignored. This allows
inserting/updating settings with different priorities with a single
command.
:param values: the settings names and values
:type values: dict or string or :class:`~scrapy.settings.BaseSettings`
:param priority: the priority of the settings. Should be a key of
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
:type priority: str or int
"""
self._assert_mutability()
if isinstance(values, str):
values = cast(dict[_SettingsKeyT, Any], json.loads(values))
if values is not None:
if isinstance(values, BaseSettings):
for name, value in values.items():
self.set(name, value, cast(int, values.getpriority(name)))
else:
for name, value in values.items():
self.set(name, value, priority)
"""
直接就是嗲用update
"""
def setdict(self, values: _SettingsInputT, priority: int | str = "project") -> None:
self.update(values, priority)
"""
导入一个模块
"""
def setmodule(
self, module: ModuleType | str, priority: int | str = "project"
) -> None:
"""
Store settings from a module with a given priority.
This is a helper function that calls
:meth:`~scrapy.settings.BaseSettings.set` for every globally declared
uppercase variable of ``module`` with the provided ``priority``.
:param module: the module or the path of the module
:type module: types.ModuleType or str
:param priority: the priority of the settings. Should be a key of
:attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
:type priority: str or int
"""
self._assert_mutability() # 用于判断是否冻结
if isinstance(module, str):
module = import_module(module)
for key in dir(module): # dir返回对象的属性列表,所有model中定义的变量都为model的属性
if key.isupper():
self.set(key, getattr(module, key), priority) # getattr 从对象中去属性
# BaseSettings.update() doesn't support all inputs that MutableMapping.update() supports
"""
删除某个属性,但是会判断提供的分数,如果分数小于原先的分数,就不会删除
"""
def delete(self, name: _SettingsKeyT, priority: int | str = "project") -> None:
if name not in self:
raise KeyError(name)
self._assert_mutability()
priority = get_settings_priority(priority)
if priority >= cast(int, self.getpriority(name)):
del self.attributes[name]
"""
直接调用深拷贝
"""
def copy(self) -> Self:
"""
Make a deep copy of current settings.
This method returns a new instance of the :class:`Settings` class,
populated with the same values and their priorities.
Modifications to the new object won't be reflected on the original
settings.
"""
return copy.deepcopy(self)
def freeze(self) -> None:
"""
Disable further changes to the current settings.
After calling this method, the present state of the settings will become
immutable. Trying to change values through the :meth:`~set` method and
its variants won't be possible and will be alerted.
"""
self.frozen = True
def frozencopy(self) -> Self: # 一个不可变的副本
"""
Return an immutable copy of the current settings.
Alias for a :meth:`~freeze` call in the object returned by :meth:`copy`.
"""
copy = self.copy()
copy.freeze() # 冻结了一下
return copy
"""
返回符合_SettingsKeyT设定类型的值
"""
def _get_key(self, key_value: Any) -> _SettingsKeyT:
return (
key_value
if isinstance(key_value, (bool, float, int, str, type(None)))
else str(key_value)
)
"""
转化成字典
"""
def _to_dict(self) -> dict[_SettingsKeyT, Any]:
return {
self._get_key(k): (v._to_dict() if isinstance(v, BaseSettings) else v)
for k, v in self.items()
}
"""
先复制,然后再转化为字典
"""
def copy_to_dict(self) -> dict[_SettingsKeyT, Any]:
"""
Make a copy of current settings and convert to a dict.
This method returns a new dict populated with the same values
and their priorities as the current settings.
Modifications to the returned dict won't be reflected on the original
settings.
This method can be useful for example for printing settings
in Scrapy shell.
"""
settings = self.copy()
return settings._to_dict()
# https://ipython.readthedocs.io/en/stable/config/integrating.html#pretty-printing
"""
pformat把一个对象转化为字符串
https://docs.python.org/zh-cn/3.9/library/pprint.html?highlight=pformat#pprint.pformat
"""
def _repr_pretty_(self, p: Any, cycle: bool) -> None:
if cycle:
p.text(repr(self))
else:
p.text(pformat(self.copy_to_dict()))
"""
就取值之后删掉这个属性
"""
def pop(self, name: _SettingsKeyT, default: Any = __default) -> Any:
try:
value = self.attributes[name].value
except KeyError:
if default is self.__default:
raise
return default
self.__delitem__(name)
return value
# 断言是否冻结
def _assert_mutability(self) -> None: # mutability 翻译:易变性
if self.frozen:
raise TypeError("Trying to modify an immutable Settings object") # 正在尝试修改不可变的设置对象
Settings继承至BaseSettings。在实例化Setttings的时候,会有限去加载default_settings.py文件中的设置到settings实例中。(settings的初始化流程、自定义配置加载流程,见此文章)
class Settings(BaseSettings):
"""
This object stores Scrapy settings for the configuration of internal
components, and can be used for any further customization.
It is a direct subclass and supports all methods of
:class:`~scrapy.settings.BaseSettings`. Additionally, after instantiation
of this class, the new object will have the global default settings
described on :ref:`topics-settings-ref` already populated.
"""
def __init__(self, values: _SettingsInputT = None, priority: int | str = "project"):
# Do not pass kwarg values here. We don't want to promote user-defined
# dicts, and we want to update, not replace, default dicts with the
# values given by the user
super().__init__()
self.setmodule(default_settings, "default") # 添加默认的设置,优先级是"default", 这里的default_settings已经是被import进来的了,是一个模块对象,不再是str了
# Promote default dictionaries to BaseSettings instances for per-key
# priorities
for name, val in self.items():
if isinstance(val, dict): # 这里是把原本是字典类型的值,再被BaseSettings实例化一下
tmp_base_settings = BaseSettings(val, "default")
self.set(name, tmp_base_settings, "default")
self.update(values, priority)
第五部分 打印自定义setting.py的修改内容
在scrapy的启动的时候,scrapy会打印出一个字典。列出了我们修改了哪些设置。这个功能就是这两个方法实现
"""
把默认的配置文件转化为一个生成器
"""
def iter_default_settings() -> Iterable[tuple[str, Any]]:
"""Return the default settings as an iterator of (name, value) tuples"""
for name in dir(default_settings):
if name.isupper():
yield name, getattr(default_settings, name)
"""
用设置中的值与默认设置的生成器对比,找出不一样的设置。用来实现scrpay启动的时候,识别出修改了哪些默认设置的功能
"""
def overridden_settings(
settings: Mapping[_SettingsKeyT, Any]
) -> Iterable[tuple[str, Any]]:
"""Return an iterable of the settings that have been overridden"""
for name, defvalue in iter_default_settings():
value = settings[name]
if not isinstance(defvalue, dict) and value != defvalue:
yield name, value
标签:name,settings,default,self,value,priority,Scrapy,BaseSettings
From: https://blog.csdn.net/weixin_43471909/article/details/144742137