目录
- 题目地址
- MySQL代码
- 等效pandas代码
题目地址
https://leetcode.cn/problems/customers-with-strictly-increasing-purchases/description/
MySQL代码
# Write your MySQL query statement below
with t1 as(
select customer_id, year(order_date) as my_year
, sum(price) as total_spend
from Orders
group by customer_id,my_year
)
,
t2 as(
select
*
# --------------------------只需要改动这里的逻辑,其他不要动。注意里面的语句是“顺序执行的”----------------------
-- 如果customer_id切换了
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @reg:=0 end)
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @prev_total_spend:=0 end)
,(case when @prev_customer_id is null or @prev_customer_id<>customer_id then @prev_my_year:= my_year - 1 end) -- 当前年的上一年,哨兵
, @prev_my_year as prev_my_year -- 没啥用,纯粹调试用的,看这个用户变量@prev_my_year是否符合预期。
-- 年购买量严格增加的年数(连续年)该任务是否成功,ok不一定成功,failure一定是失败了
,(case when total_spend>@prev_total_spend and @prev_my_year + 1 =my_year then "flag=ok" else "flag=failure" end) as flag
-- 这是维护一个【严格递增连续子序列的长度】
,(case when total_spend>@prev_total_spend and @prev_my_year + 1 =my_year then @reg:=@reg+1 else @reg:=1 end)
,@reg as reg -- 没啥用,纯粹调试用的,看这个用户变量@reg是否符合预期。
-- 准备好处理下一行了,一些@prev_*用户变量更新一下
,@prev_total_spend:=total_spend
,@prev_customer_id:=customer_id
,@prev_my_year:=my_year
# --------------------------只需要改动这里的逻辑,其他不要动。注意里面的语句是“顺序执行的”----------------------
from (select * from t1 order by customer_id asc, my_year asc ) soerted_t1,
(select @reg := 0 -- 某用户年购买量严格增加的年数(连续年)
,@prev_total_spend:=0 ,@prev_customer_id := null, @prev_my_year :=null ) vars
)
select distinct customer_id
from t2
where customer_id not in (select customer_id from t2 where flag="flag=failure")
order by customer_id asc, my_year asc
-- 下面注释掉的代码是用来调试的
# select
# customer_id,my_year,reg,total_spend,prev_my_year,flag
# from t2
# where customer_id=6
# order by customer_id asc, my_year asc
等效pandas代码
import pandas as pd
def find_specific_customers(orders: pd.DataFrame) -> pd.DataFrame:
# First, we mimic the 't1' CTE by grouping and summing the price per customer per year
orders['my_year'] = pd.DatetimeIndex(orders['order_date']).year
t1 = orders.groupby(['customer_id', 'my_year'], as_index=False)['price'].sum().rename(columns={'price': 'total_spend'})
# Sort the DataFrame similarly to the 'soerted_t1' in the query
t1_sorted = t1.sort_values(by=['customer_id', 'my_year'])
# Now we need to perform the logic that's done in 't2' CTE
# We will use the shift operation to compare the current row with the previous one
t1_sorted['prev_customer_id'] = t1_sorted['customer_id'].shift(1)
t1_sorted['prev_my_year'] = t1_sorted['my_year'].shift(1)
t1_sorted['prev_total_spend'] = t1_sorted['total_spend'].shift(1)
# Initialize the reg (registration) to track the strictly increasing sequence length
t1_sorted['reg'] = 1
t1_sorted['flag'] = 'flag=ok'
# Loop through the DataFrame to calculate conditional variables
for i in range(1, len(t1_sorted)):
row = t1_sorted.iloc[i]
prev_row = t1_sorted.iloc[i - 1]
if row['customer_id'] == prev_row['customer_id']:
if row['total_spend'] > prev_row['total_spend'] and row['prev_my_year'] + 1 == row['my_year']:
t1_sorted.at[i, 'reg'] = prev_row['reg'] + 1
else:
t1_sorted.at[i, 'flag'] = 'flag=failure'
t1_sorted.at[i, 'reg'] = 1
# Identify customers with no 'flag=failure'
customers_without_failure = t1_sorted[~t1_sorted['customer_id'].isin(t1_sorted.loc[t1_sorted['flag'] == 'flag=failure', 'customer_id'])]['customer_id'].unique()
# Return a DataFrame with the customer_id's that meet the criteria
result = pd.DataFrame({'customer_id': customers_without_failure})
return result.sort_values('customer_id').reset_index(drop=True)