dbt query_header 简单说明

标签：comment self header str sql query dbt

dbt 对于每个实际执行的任务（实际sql）都会包含一个任务注释，可以方便的查看dbt 版本，执行nodeid，target

参考格式

/* {"app": "dbt", "dbt_version": "1.5.11", "profile_name": "dremio_nessie", "target_name": "dev", "node_id": "model.dremio_demo_app.my_mydemoapp"} */

因为实际的执行是对于实际db 的操作，所以query_header 是在connections 内部处理的

def __init__(self, profile: AdapterRequiredConfig, mp_context: SpawnContext) -> None:

    self.profile = profile

    self.thread_connections: Dict[Hashable, Connection] = {}

    self.lock: RLock = mp_context.RLock()

    self.query_header: Optional[MacroQueryStringSetter] = None

def set_query_header(self, query_header_context: Dict[str, Any]) -> None:

    self.query_header = MacroQueryStringSetter(self.profile, query_header_context)

class MacroQueryStringSetter:

    def __init__(self, config: AdapterRequiredConfig, manifest: Manifest):

        self.manifest = manifest

        self.config = config

        # 可以看出是一个macro，实际如果debug 也可以看出会有一个query comment 的macro

        comment_macro = self._get_comment_macro()

        self.generator: QueryStringFunc = lambda name, model: ""

        # if the comment value was None or the empty string, just skip it

        if comment_macro:

            assert isinstance(comment_macro, str)

            macro = "\n".join(

                    "{%- macro query_comment_macro(connection_name, node) -%}",

                    comment_macro,

                    "{% endmacro %}",

            ctx = self._get_context()

            self.generator = QueryStringGenerator(macro, ctx)

        self.comment = _QueryComment(None)

        self.reset()

    def _get_comment_macro(self) -> Optional[str]:

        return self.config.query_comment.comment

    def _get_context(self) -> Dict[str, Any]:

        return generate_query_header_context(self.config, self.manifest)

    def add(self, sql: str) -> str:

        return self.comment.add(sql)

    def reset(self):

        self.set("master", None)

    def set(self, name: str, node: Optional[ResultNode]):

        wrapped: Optional[NodeWrapper] = None

        if node is not None:

            wrapped = NodeWrapper(node)

        comment_str = self.generator(name, wrapped)

        append = False

        if isinstance(self.config.query_comment, QueryComment):

            append = self.config.query_comment.append

        self.comment.set(comment_str, append)

class _QueryComment(local):

    """A thread-local class storing thread-specific state information for

    connection management, namely:

        - the current thread's query comment.

        - a source_name indicating what set the current thread's query comment

"""

    def __init__(self, initial) -> None:

        self.query_comment: Optional[str] = initial

        self.append: bool = False

    def add(self, sql: str) -> str:

        if not self.query_comment:

            return sql

        if self.append:

            # replace last ';' with '<comment>;'

            sql = sql.rstrip()

            if sql[-1] == ";":

                sql = sql[:-1]

                return "{}\n/* {} */;".format(sql, self.query_comment.strip())

            return "{}\n/* {} */".format(sql, self.query_comment.strip())

        return "/* {} */\n{}".format(self.query_comment.strip(), sql)

    def set(self, comment: Optional[str], append: bool):

        if isinstance(comment, str) and "*/" in comment:

            # tell the user "no" so they don't hurt themselves by writing

            # garbage

            raise DbtRuntimeError(f'query comment contains illegal value "*/": {comment}')

        self.query_comment = comment

        self.append = append

class AdapterRequiredConfig(HasCredentials, Protocol):

    project_name: str

    query_comment: QueryComment

    cli_vars: Dict[str, Any]

    target_path: str

    log_cache_events: bool

DEFAULT_QUERY_COMMENT = """

{%- set comment_dict = {} -%}

{%- do comment_dict.update(

    app='dbt',

    dbt_version=dbt_version,

    profile_name=target.get('profile_name'),

    target_name=target.get('target_name'),

) -%}

{%- if node is not none -%}

  {%- do comment_dict.update(

    node_id=node.unique_id,

  ) -%}

{% else %}

  {# in the node context, the connection name is the node_id #}

  {%- do comment_dict.update(connection_name=connection_name) -%}

{%- endif -%}

{{ return(tojson(comment_dict)) }}

"""

 def execute(

        self,

        sql: str,

        auto_begin: bool = False,

        fetch: bool = False,

        limit: Optional[int] = None,

    ) -> Tuple[AdapterResponse, agate.Table]:

        sql = self._add_query_comment(sql)

        _, cursor = self.add_query(sql, auto_begin)

        response = self.get_response(cursor)

        if fetch:

            table = self.get_result_from_cursor(cursor, limit)

        else:

            table = empty_table()

        return response, table

class QueryHeaderContext(ManifestContext):

    def __init__(self, config: AdapterRequiredConfig, manifest: Manifest) -> None:

        super().__init__(config, manifest, config.project_name)

def generate_query_header_context(config: AdapterRequiredConfig, manifest: Manifest):

    ctx = QueryHeaderContext(config, manifest)

    return ctx.to_dict()

传递是dbt cli 的context 处理装饰器中 (core requires 模块)

register_adapter(runtime_config, get_mp_context())

          adapter = get_adapter(runtime_config)

          adapter.set_macro_context_generator(generate_runtime_macro_context)

          adapter.set_macro_resolver(ctx.obj["manifest"])

          query_header_context = generate_query_header_context(

              adapter.config, ctx.obj["manifest"]

          adapter.connections.set_query_header(query_header_context)

      return func(*args, **kwargs)

dbt 的注释还是比较方便的，可以进行执行信息查看分析，实际上基于macro 的sql 查询注释信息生成

dbt/adapters/base/connections.py
dbt/adapters/base/query_headers.py
core/dbt/cli/requires.py

标签：comment,self,header,str,sql,query,dbt
From： https://www.cnblogs.com/rongfengliang/p/18105085