表的结构是这样的,时间的范围我是提取了几个小时内的数据
create table `alibaba.user_bea` (
user_id bigint,
item_id bigint,
cate_id bigint,
times string,
bea int
)
PARTITIONED BY (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS orc;
提取时间
select item_id,ranks from (Select item_id, from_unixtime(cast(times as bigint),'yyyyMMdd HH') as times,row_number() over (partition by times,cate_id order by count(*) desc )as ranks from user_bea group by cate_id ,times,item_id)t1 where ranks<=3;
数据下载地址 导入方法如下
#hdfs路径
load data inpath "/flinkworkspace/work/input.har/part-0" overwrite into table user_bea_tmp;
#本地路径
load data local inpath "/home/lin/part-0" overwrite into table user_bea_tmp;