第六题:统计每个成绩段人数
create table sql1_6(
str string
);
load data local inpath '/home/homedata/sql_1/sql1_6.txt' into table sql1_6;
{"username":"张三","score":95}
{"username":"李四","score":76}
{"username":"赵本山","score":92}
{"username":"王五","score":76}
{"username":"赵六","score":62}
{"username":"赵六1","score":62}
{"username":"赵六2","score":26}
{"username":"赵六3","score":89}
{"username":"赵六4","score":77}
主要是函数get_json_object的使用
with t1 as (
select case when get_json_object(str,'$.score') >90 then '优'
when get_json_object(str,'$.score') >80 then '良'
when get_json_object(str,'$.score') >60 then '中'
else '差' end grade
from sql1_6
)
select grade,count(*) from t1 group by grade;
题目七:支付次数和累计充值金额统计
create table sql1_7(
user_id string,
date_dt string,
trans_type string,
trans_amt double,
trans_dc string
)row format delimited fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_7.txt' into table sql1_7;
u_001,20191201,支付,20000,支
u_001,20191203,充值,30010,收
u_001,20191203,提现,50000,支
u_001,20191208,支付,20000,支
u_001,20191210,充值,30010,收
u_001,20191220,提现,50000,支
u_002,20191202,支付,20000,支
u_002,20191202,转入,30010,收
u_002,20191230,充值,50000,收
u_003,20200110,支付,60.68,支
u_004,20200111,支付,90.05,支
u_004,20200114,充值,100.1,收
u_005,20200101,还款,30010,支
求每个月用户的支付次数(trans_dc为支的数据)大于3次,且每月累计充值金额大于10000元的用户交易记录。
with t1 as (
select user_id,substr(date_dt,1,6) dt,trans_dc ,count(*) count from sql1_7 group by user_id,trans_dc,substr(date_dt,1,6)
),
t2 as (
select user_id,dt from t1 where count >3 and trans_dc ='支'
),
t3 as (
select t2.user_id user_id,dt,trans_amt from t2 left join sql1_7 on t2.user_id = sql1_7.user_id and dt = substr(date_dt,1,6) and trans_type = '充值'
)
select user_id,dt from t3 group by user_id,dt having sum(trans_amt) >10000;
题目八:对于订单数据统计
create table sql1_8
(
order_id string, --订单id
user_id string, --用户id
user_name string, --用户姓名
order_date string, --下单日期
order_amount int --订单金额
);
insert overwrite table sql1_8
values('1','1001','小元','2022-01-01','10'),
('2','1002','小海','2022-01-02','15'),
('3','1001','小元','2022-02-03','23'),
('4','1002','小海','2022-01-04','29'),
('5','1001','小元','2022-01-05','46'),
('6','1001','小元','2022-04-06','42'),
('7','1002','小海','2022-01-07','50'),
('8','1001','小元','2022-01-08','50'),
('9','1003','小辉','2022-04-08','62'),
('10','1003','小辉','2022-04-09','62'),
('11','1004','小猛','2022-05-10','12'),
('12','1003','小辉','2022-04-11','75'),
('13','1004','小猛','2022-06-12','80'),
('14','1003','小辉','2022-04-13','94');
select * from sql1_8;
-- 1.统计每个用户截至每次下单的累积下单总额
select *,sum(order_amount) over (partition by user_id order by order_date) sumMoney from sql1_8;
-- 2.统计每个用户截至每次下单的当月累积下单总额
select *,
sum(order_amount) over (partition by user_id ,month(order_date) order by order_date )
from sql1_8;
-- 3.统计每个用户每次下单距离上次下单相隔的天数(首次下单按0天算)
select *,datediff(order_date,lag(order_date,1,order_date) over (partition by user_id order by order_date )) days from sql1_8;
-- 4.为每个用户的所有下单记录按照订单金额进行排名
select *,dense_rank() over (partition by user_id order by order_amount desc ) num from sql1_8;
题目九:网站登录时间间隔统计
create table sql1_9 (
user_id string,
dt string
);
INSERT INTO
sql1_9(user_id,dt)
VALUEs('a001','2021-01-01')
,('b001','2021-01-01')
,('a001','2021-01-03')
,('a001','2021-01-06')
,('a001','2021-01-07')
,('b001','2021-01-07')
,('a001','2021-01-08')
,('a001','2021-01-09')
,('b001','2021-01-09')
,('b001','2021-01-10')
,('b001','2021-01-15')
,('a001','2021-01-16')
,('a001','2021-01-18')
,('a001','2021-01-19')
,('b001','2021-01-20')
,('a001','2021-01-23');
select * from sql1_9;
计算每个用户登录日期间隔小于5天的次数。——主要是lag函数的使用,只需要计算这次和上次登录的时间差,然后汇总即可。
with t1 as (
select *,datediff(dt,lag(dt,1,dt) over (partition by user_id order by dt ) )-1 as days from sql1_9
)select user_id,count(*) count from t1 where days <5 and days !=-1 group by user_id;
题目十:蚂蚁森林,连续问题
create table sql1_10(
id int,
dt string,
num int
)
row format delimited
fields terminated by ' ';
load data local inpath '/home/homedata/sql_1/sql1_10.txt' into table sql1_10;
id dt lowcarbon
1001 2021-12-12 123
1002 2021-12-12 45
1001 2021-12-13 43
1001 2021-12-13 45
1001 2021-12-13 23
1002 2021-12-14 45
1001 2021-12-14 230
1002 2021-12-15 45
1001 2021-12-15 23
找出连续 3 天及以上总共减少碳排放量在 100 以上的,这个题同样是连续问题,只需要通过row_number进行排序,然后使用日期减去这个排名,日期相同的即为同一组,进行汇总即可。
with t1 as (
select id,dt,sum(num) sum,row_number() over (partition by id order by dt) num from sql1_10 group by id ,dt
)select id from t1 group by id,date_sub(dt,num) having sum(sum) > 100 and count(*) >= 3;
标签:10,01,专场,sql1,2021,sql,dt,id,user
From: https://blog.csdn.net/weixin_52642840/article/details/143455127