MySQL实战--用户行为数据分析--8小时跟做（3）

标签：数据分析 category name -- int behavior MySQL type id

RFM模型_哔哩哔哩_bilibili

8 RFM模型

{最近消费（Recency）、消费频率（Frequency）、消费金额（Monetary）三个指标）对用户黏性、忠诚度和收入这三个维度进行数值定量分析，然后和平均数（中位数）对比得到定性描述（高或低）

--存储
create table rfm_model(
user_id int,
frequency int,
recent char(10)
);

insert into rfm_model
select user_id
,count(user_id) '购买次数'
,max(dates) '最近购买时间'
from user_behavior
where behavior_type='buy'
group by user_id
order by 2 desc,3 desc;

分层

-- 根据购买次数对用户进行分层 
alter table rfm_model add column fscore int;

update rfm_model
set fscore = case
when frequency between 100 and 262 then 5
when frequency between 50 and 99 then 4
when frequency between 20 and 49 then 3
when frequency between 5 and 20 then 2
else 1
end

-- 根据最近购买时间对用户进行分层 
alter table rfm_model add column rscore int;

update rfm_model
set rscore = case
when recent = '2017-12-03' then 5
when recent in ('2017-12-01','2017-12-02') then 4
when recent in ('2017-11-29','2017-11-30') then 3
when recent in ('2017-11-27','2017-11-28') then 2
else 1
end

select * from rfm_model；

使用alter添加列到表。

在SQL中，ALTER语句用于对现有数据库表进行修改。

--add 添加新列
ALTER TABLE table_name
ADD column_name datatype;

--删除列
ALTER TABLE table_name
DROP COLUMN column_name;

--修改列的数据类型
ALTER TABLE table_name
ALTER COLUMN column_name datatype;

--重命名表
ALTER TABLE table_name
RENAME TO new_table_name;

--添加约束
ALTER TABLE table_name
ADD CONSTRAINT constraint_name constraint_definition;

--删除约束
ALTER TABLE table_name
DROP CONSTRAINT constraint_name;

--修改列的默认值
ALTER TABLE table_name
ALTER COLUMN column_name SET DEFAULT default_value;

--重命名列
ALTER TABLE table_name
RENAME COLUMN column_name TO new_column_name;

用平均值分层

-- 分层：定义两个用户变量存储他们的平均值，用平均值分层用户
set @f_avg=null;
set @r_avg=null;
select avg(fscore) into @f_avg from rfm_model;
select avg(rscore) into @r_avg from rfm_model;

select *
,(case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end) class
from rfm_model

其中，设置变量存储平均值，myql中变量的设置，下面是用法

SET @variable_name = value;

SELECT column_name INTO variable_name 
FROM table_name 
WHERE condition;

将结果插入表并统计各区用户数

-- 将结果插入表中
alter table rfm_model add column class varchar(40);
update rfm_model
set class = case
when fscore>@f_avg and rscore>@r_avg then '价值用户'
when fscore>@f_avg and rscore<@r_avg then '保持用户'
when fscore<@f_avg and rscore>@r_avg then '发展用户'
when fscore<@f_avg and rscore<@r_avg then '挽留用户'
end


-- 统计各分区用户数
select class,count(user_id) from rfm_model
group by class

9 商品按热度分类

使用窗口函数对商品排序


create table popular_categories(
category_id int,
pv int);
create table popular_items(
item_id int,
pv int);
create table popular_cateitems(
category_id int,
item_id int,
pv int);


insert into popular_categories
select category_id
,count(if(behavior_type='pv',behavior_type,null)) '品类浏览量'
from user_behavior
GROUP BY category_id
order by 2 desc
limit 10;


insert into popular_items
select item_id
,count(if(behavior_type='pv',behavior_type,null)) '商品浏览量'
from user_behavior
GROUP BY item_id
order by 2 desc
limit 10;

insert into popular_cateitems
select category_id,item_id,
品类商品浏览量 from
(
select category_id,item_id
,count(if(behavior_type='pv',behavior_type,null)) '品类商品浏览量'
,rank()over(partition by category_id order by '品类商品浏览量' desc) r
from user_behavior
GROUP BY category_id,item_id
order by 3 desc
) a
where a.r = 1
order by a.品类商品浏览量 desc
limit 10

通俗易懂的学会：SQL窗口函数

子查询和表连接

由于我的MySQL是5.7版本,在MySQL中，窗口函数是在MySQL 8.0版本引入的新功能。所以一直报错。接下来修改，用子查询和表连接进行。

insert into popular_cateitems
SELECT t1.category_id, t1.item_id, t1.品类商品浏览量
FROM (
    SELECT category_id, item_id,
    COUNT(IF(behavior_type = 'pv', behavior_type, NULL)) AS 品类商品浏览量
    FROM user_behavior
    GROUP BY category_id, item_id
) t1
JOIN (
    SELECT category_id, MAX(品类商品浏览量) AS max_浏览量
    FROM (
        SELECT category_id, item_id,
        COUNT(IF(behavior_type = 'pv', behavior_type, NULL)) AS 品类商品浏览量
        FROM user_behavior
        GROUP BY category_id, item_id
    ) t2
    GROUP BY category_id
) t3
ON t1.category_id = t3.category_id AND t1.品类商品浏览量 = t3.max_浏览量
ORDER BY t1.品类商品浏览量 DESC
LIMIT 10;

首先在子查询中计算每个品类的商品浏览量。在外部查询中，找到每个品类浏览量最大的商品，并为每个品类找到浏览量的最大值。最后，我们按照浏览量降序排列，并限制结果为前10条。

即t1统计了每个类目下每个商品的浏览量，t2

t2选择了每个品类中的最大浏览量

使用join on 内连接保留每个类目最大的品类、商品、浏览量

10 商品转化率分析

商品转化率

-- 商品转化率
create table item_detail(
item_id int,
pv int,
fav int,
cart int,
buy int,
user_buy_rate float);

insert into item_detail
select item_id
,SUM(CASE WHEN behavior_type = 'pv' THEN 1 ELSE 0 END) 'pv' 
,SUM(CASE WHEN behavior_type = 'fav' THEN 1 ELSE 0 END) 'fav' 
,SUM(CASE WHEN behavior_type = 'cart' THEN 1 ELSE 0 END) 'cart' 
,SUM(CASE WHEN behavior_type = 'buy' THEN 1 ELSE 0 END) 'buy' 
,SUM(distinct(CASE WHEN behavior_type = 'buy' THEN 1 ELSE 0 END))/count(distinct user_id) 商品转化率 
from user_behavior
group by item_id
order by 商品转化率 desc;

select * from item_detail;

品类转化率

-- 品类转化率 

create table category_detail(
category_id int,
pv int,
fav int,
cart int,
buy int,
user_buy_rate float);

insert into category_detail
select category_id
,count(if(behavior_type='pv',1,null)) 'pv'
,count(if(behavior_type='fav',1,null)) 'fav'
,count(if(behavior_type='cart',1,null)) 'cart'
,count(if(behavior_type='buy',1,null)) 'buy'
,count(distinct if(behavior_type='buy',user_id,null))/count(distinct user_id) 品类转化率
from user_behavior
group by category_id
order by 品类转化率 desc;

select * from category_detail;

11 商品特征分析

1.点击与购买的关系

矩阵分析，相关分析，分层分析

（记得取消聚合）

筛选器：剔除购买量为0的

分层分析

添加参考线（平均值）+筛选器

进行点击购买分析

12 数据可视化

使用tableau制作可视化看板，可以取B站搜Tableau相关教程

标签：数据分析,category,name,--,int,behavior,MySQL,type,id
From： https://blog.csdn.net/Faracross/article/details/139609870