hive 元数据的数据模型
hive元数据的数据模型.pnghive元数据的查用查询语句
通过中文字段找表
select d.NAME,a.TBL_NAME,e.PARAM_VALUE,c.COLUMN_NAME,c.TYPE_NAME,c.COMMENT,c.INTEGER_IDX from TBLS a
join SDS b
on (a.SD_ID=b.SD_ID)
join
(select * from COLUMNS_V2 where comment like '%xxxx%') c --通过列名中文注释找表名
ON (c.CD_ID=b.CD_ID)
join
DBS d
on (a.DB_ID=d.DB_ID)
join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='comment') e
on (a.TBL_ID=e.TBL_ID)
order by TBL_NAME,INTEGER_IDX
根据表中文注释找表
select d.NAME,a.TBL_NAME,b.PARAM_VALUE from
TBLS a
join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='comment' and PARAM_VALUE like '%xxx%') b --表中文注释
on (a.TBL_ID=b.TBL_ID)
join
DBS d
on (a.DB_ID=d.DB_ID)
where d.NAME='dw_idl' order by a.TBL_NAME
查找该表授权的视图
select d.NAME,a.TBL_NAME from TBLS a
join
DBS d
on (a.DB_ID=d.DB_ID)
where VIEW_ORIGINAL_TEXT like "%xxx%"
分区分桶表元数据查询
SELECT DBS.NAME as '数据库名'
, TBLS.TBL_NAME as '表/视图名'
, COMM.PARAM_VALUE as '表注释'
, FROM_UNIXTIME(TBLS.CREATE_TIME,'%Y-%m-%d') as '表创建时间'
, TBLS.OWNER as '表属主用户'
, FROM_UNIXTIME(TBL_PRIVS.CREATE_TIME,'%Y-%m-%d') as '表被授权时间'
, TBL_PRIVS.PRINCIPAL_NAME as '被授权用户'
, PARTITION_KEYS.PKEY_NAME as '分区字段'
, BUCKETING_COLS.BUCKET_COL_NAME as '分桶字段'
, SDS.NUM_BUCKETS as '分桶数'
, TBLS.LAST_ACCESS_TIME as '最近一次访问时间'
FROM TBLS
JOIN DBS ON TBLS.DB_ID = DBS.DB_ID
JOIN SDS ON TBLS.SD_ID = SDS.SD_ID
JOIN CDS ON SDS.CD_ID = CDS.CD_ID
JOIN TBL_PRIVS ON TBLS.TBL_ID = TBL_PRIVS.TBL_ID
LEFT JOIN PARTITION_KEYS ON PARTITION_KEYS.TBL_ID = TBLS.TBL_ID
LEFT JOIN BUCKETING_COLS ON TBLS.SD_ID = BUCKETING_COLS.SD_ID
LEFT JOIN ( SELECT TBL_ID , PARAM_VALUE FROM TABLE_PARAMS WHERE PARAM_KEY = 'comment') COMM
ON (COMM.TBL_ID = TBLS.TBL_ID)
统计各库表数量
select d.NAME,count(*) cnt from
TBLS a
join
DBS d
on (a.DB_ID=d.DB_ID)
group by d.NAME order by cnt desc
统计某一时间后未变化过的表
select DB_NAME,count(*) from
(
select b.NAME DB_NAME,a.TBL_NAME,c.PARAM_VALUE comment,from_unixtime(cast(a.CREATE_TIME as SIGNED)) CREATE_TIME,from_unixtime(cast(d.PARAM_VALUE as SIGNED)) last_modified_time,from_unixtime(cast(e.PARAM_VALUE as SIGNED)) transient_lastDdlTime from
TBLS a
join
DBS b
on (a.DB_ID=b.DB_ID)
join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='transient_lastDdlTime' and PARAM_VALUE<'1577808000') e
on (a.TBL_ID=e.TBL_ID)
left join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='comment') c
on (a.TBL_ID=c.TBL_ID)
left join
(select TBL_ID,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY='last_modified_time') d
on (a.TBL_ID=d.TBL_ID)
) f group by DB_NAME