一、登录spark客户端
spark-sql --master yarn \
--deploy-mode client \
--queue default \
--name wang \
--driver-memory 12G \
--num-executors 10 \
--executor-cores 4 \
--executor-memory 20G
二、sql查询表信息
1、查询表快照信息
SELECT * FROM spark_catalog.data_lake_ods.test_table.snapshots order by committed_at desc
SELECT count(1) FROM spark_catalog.data_lake_ods.test_table.snapshots
2、查询删除文件
SELECT * FROM data_lake_ods.test_table.all_delete_files
SELECT count(1) FROM data_lake_ods.test_table.all_delete_files
三、表治理
1、小文件合并
CALL spark_catalog.system.rewrite_data_files(
table => 'data_lake_ods.test_table',
options => map(
'partial-progress.enabled', 'true',
'rewrite-job-order', 'bytes-asc',
'partial-progress.max-commits', '10000',
'max-file-group-size-bytes', '1073741824',
'rewrite-all','true'
)
);
2、过期快照清理
CALL spark_catalog.system.expire_snapshots(table => 'data_lake_ods.test_table', older_than => TIMESTAMP '2024-04-08 10:00:00.000');
标签:iceberg,--,ods,lake,治理,常用命令,test,table,data From: https://www.cnblogs.com/robots2/p/18150197