dbt dbt-audit-helper 包在进行compare_relation_columns处理的时候进行数据表列字段创建顺序的判断
参考使用
我按照test 处理的,同时进行的测试异常进行存储
- 使用
{{ audit_helper.compare_relation_columns(
a_relation = source("dalongdemo","mytest_appv2"),
b_relation = source("dalongdemo","mytest_appv3")
)}}
- 存储效果
内部处理参考
- 参考macro 定义
{{ return(adapter.dispatch('compare_relation_columns', 'audit_helper')(a_relation, b_relation)) }}
{% endmacro %}
{% macro default__compare_relation_columns(a_relation, b_relation) %}
# 自己包装的,实际内部默认使用了adapter 的get_columns_in_relation,但是对于不同数据库可能会有差异,基于dispatch 模式进行了处理
with a_cols as (
{{ audit_helper.get_columns_in_relation_sql(a_relation) }}
),
b_cols as (
{{ audit_helper.get_columns_in_relation_sql(b_relation) }}
)
# 通过full outer join 进行实际上的判断处理,会结合类型,名称,以及字段创建的顺序
select
column_name,
a_cols.ordinal_position as a_ordinal_position,
b_cols.ordinal_position as b_ordinal_position,
a_cols.data_type as a_data_type,
b_cols.data_type as b_data_type,
coalesce(a_cols.ordinal_position = b_cols.ordinal_position, false) as has_ordinal_position_match,
coalesce(a_cols.data_type = b_cols.data_type, false) as has_data_type_match,
a_cols.data_type is not null and b_cols.data_type is null as in_a_only,
b_cols.data_type is not null and a_cols.data_type is null as in_b_only,
b_cols.data_type is not null and a_cols.data_type is not null as in_both
from a_cols
full outer join b_cols using (column_name)
order by coalesce(a_cols.ordinal_position, b_cols.ordinal_position)
{% endmacro %}
{% macro get_columns_in_relation_sql(relation) %}
{{ adapter.dispatch('get_columns_in_relation_sql', 'audit_helper')(relation) }}
{% endmacro %}
{% macro default__get_columns_in_relation_sql(relation) %}
{% set columns = adapter.get_columns_in_relation(relation) %}
{% for column in columns %}
select
{{ dbt.string_literal(column.name) }} as column_name,
{{ loop.index }} as ordinal_position,
{{ dbt.string_literal(column.data_type) }} as data_type
{% if not loop.last -%}
union all
{%- endif %}
{% endfor %}
{% endmacro %}
{% macro redshift__get_columns_in_relation_sql(relation) %}
{# You can't store the results of an info schema query to a table/view in Redshift, because the data only lives on the leader node #}
{{ return (audit_helper.default__get_columns_in_relation_sql(relation)) }}
{% endmacro %}
{% macro snowflake__get_columns_in_relation_sql(relation) %}
{#-
From: https://github.com/dbt-labs/dbt/blob/dev/louisa-may-alcott/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql#L48
Edited to include ordinal_position
-#}
select
ordinal_position,
column_name,
data_type,
character_maximum_length,
numeric_precision,
numeric_scale
from
{{ relation.information_schema('columns') }}
where table_name ilike '{{ relation.identifier }}'
{% if relation.schema %}
and table_schema ilike '{{ relation.schema }}'
{% endif %}
{% if relation.database %}
and table_catalog ilike '{{ relation.database }}'
{% endif %}
order by ordinal_position
{% endmacro %}
{% macro postgres__get_columns_in_relation_sql(relation) %}
{#-
From: https://github.com/dbt-labs/dbt/blob/23484b18b71010f701b5312f920f04529ceaa6b2/plugins/postgres/dbt/include/postgres/macros/adapters.sql#L32
Edited to include ordinal_position
-#}
select
ordinal_position,
column_name,
data_type,
character_maximum_length,
numeric_precision,
numeric_scale
from {{ relation.information_schema('columns') }}
where table_name = '{{ relation.identifier }}'
{% if relation.schema %}
and table_schema = '{{ relation.schema }}'
{% endif %}
order by ordinal_position
{% endmacro %}
{% macro bigquery__get_columns_in_relation_sql(relation) %}
select
ordinal_position,
column_name,
data_type
from `{{ relation.database }}`.`{{ relation.schema }}`.INFORMATION_SCHEMA.COLUMNS
where table_name = '{{ relation.identifier }}'
{% endmacro %}
说明
dbt dbt-audit-helper 包compare_relation_columns 内部处理实际上使用了还是标准INFORMATION_SCHEMA.COLUMNS 的能力,只是
包装了公共方法使用上更加方便了
参考资料
https://github.com/dbt-labs/dbt-audit-helper?tab=readme-ov-file#compare_relation_columns-source
标签:audit,compare,type,cols,relation,data,dbt,columns From: https://www.cnblogs.com/rongfengliang/p/18167940