为了避免全表扫描,优化查询性能,我们可以使用分区和分桶表将数据细化,分桶表是分区表的进阶阶段,分桶表是使用表的字段进行进一步细分数据,分区则是指定外部的字段来分区
分区表
create table `alibaba.orders` ( order_id bigint,
user_id bigint,
item_id bigint,
times string,
name string,
address string,
phone string,
keyword int
)
PARTITIONED BY (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS orc;
分桶表
create table `alibaba.userInfo` ( order_id bigint,
user_id bigint,
address string,
phone string
)
clustered by (user_id) sorted by (user_id desc) into 4 buckets
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS orc;