======题目=============================
用户在某天刷题后第二天还会再来刷题的平均概率。请你取出相应数据
======oracle数据库=================
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (1, 2138, 111, 'wrong', to_date('03-05-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (2, 3214, 112, 'wrong', to_date('09-05-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (3, 3214, 113, 'wrong', to_date('15-06-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (4, 6543, 111, 'right', to_date('13-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (5, 2315, 115, 'right', to_date('13-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (6, 2315, 116, 'right', to_date('14-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (7, 2315, 117, 'wrong', to_date('15-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (8, 3214, 112, 'wrong', to_date('09-05-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (9, 3214, 113, 'wrong', to_date('15-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (10, 6543, 111, 'right', to_date('13-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (11, 2315, 115, 'right', to_date('13-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (12, 2315, 116, 'right', to_date('14-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (13, 2315, 117, 'wrong', to_date('15-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (14, 3214, 112, 'wrong', to_date('16-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (15, 3214, 113, 'wrong', to_date('18-08-2021', 'dd-mm-yyyy'));
insert into question_practice_detail (ID, DEVICE_ID, QUESTION_ID, RESULT, DATES) values (16, 6543, 111, 'right', to_date('13-08-2021', 'dd-mm-yyyy'));
======oracle数据库sql=================
select count(Main.device_id) , sum( case when Main.flag=1 then 1 else 0 end ) , sum( case when Main.flag=1 then 1 else 0 end ) / count(Main.device_id) from (
select
device_id,
dates,
lead(dates,1,null) over (partition by device_id order by dates) as date2 , lead(dates,1,null) over (partition by device_id order by dates)- dates flag
from (
select distinct device_id, dates
from question_practice_detail
)
)Main
======mysql数据库=================
CREATE TABLE `question_practice_detail` (
`id` int NOT NULL,
`device_id` int NOT NULL,
`question_id`int NOT NULL,
`result` varchar(32) NOT NULL,
`date` date NOT NULL
);
INSERT INTO question_practice_detail VALUES(1,2138,111,'wrong','2021-05-03');
INSERT INTO question_practice_detail VALUES(2,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(3,3214,113,'wrong','2021-06-15');
INSERT INTO question_practice_detail VALUES(4,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(5,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(6,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(7,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(8,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(9,3214,113,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(10,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(11,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(12,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(13,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(14,3214,112,'wrong','2021-08-16');
INSERT INTO question_practice_detail VALUES(15,3214,113,'wrong','2021-08-18');
INSERT INTO question_practice_detail VALUES(16,6543,111,'right','2021-08-13');
======mysql数据库sql=================
限定条件:第二天再来。
解法1:表里的数据可以看作是全部第一天来刷题了的,那么我们需要构造出第二天来了的字段,因此可以考虑用left join把第二天来了的拼起来,限定第二天来了的可以用date_add(date1, interval 1 day)=date2筛选,并用device_id限定是同一个用户。
解法2:用lead函数将同一用户连续两天的记录拼接起来。先按用户分组partition by device_id,再按日期升序排序order by date,再两两拼接(最后一个默认和null拼接),即lead(date) over (partition by device_id order by date)
平均概率:
解法1:可以count(date1)得到左表全部的date记录数作为分母,count(date2)得到右表关联上了的date记录数作为分子,相除即可得到平均概率
解法2:检查date2和date1的日期差是不是为1,是则为1(次日留存了),否则为0(次日未留存),取avg即可得平均概率。
附:lead用法,date_add用法,datediff用法,date函数
SELECT
COUNT(q2.device_id) / COUNT(q1.device_id) AS avg_ret
FROM
(SELECT DISTINCT device_id, date FROM question_practice_detail)as q1
LEFT JOIN
(SELECT DISTINCT device_id, date FROM question_practice_detail) AS q2
ON q1.device_id = q2.device_id AND q2.date = DATE_ADD(q1.date, interval 1 day)