目录
- 题目地址
- MySQL代码
- 等效pandas代码
题目地址
https://leetcode.cn/problems/merge-overlapping-events-in-the-same-hall/
MySQL代码
# Write your MySQL query statement below
with t2 as(
select
*
# ----只需要改动这里的逻辑,其他不要动。注意里面的语句是“顺序执行的”-----------
-- 如果切换hall_id了,一些变量需要改变
,(case when (@prev_hall_id is null or @prev_hall_id<>hall_id) then @farest_interval_start:=start_day end)
,(case when (@prev_hall_id is null or @prev_hall_id<>hall_id) then @farest_interval_end:=end_day end)
,(case when (@prev_hall_id is null or @prev_hall_id<>hall_id) then @interval_id := @interval_id +1 end)
-- 如果新区间的开始要晚于维护区间的结尾,那么开启新区间
, (case when @farest_interval_end < start_day then @interval_id := @interval_id +1 else @interval_id end) as interval_id
, (case when @farest_interval_end < start_day then @farest_interval_start:=start_day end)
, (case when @farest_interval_end < start_day then @farest_interval_end:=end_day end)
-- 如果新区间的开始不晚于维护区间的结尾,可能会发生区间的合并。由于我们已经按照start_day asc, end_day asc排序了。所以只可能更新@farest_interval_end
,(case when @farest_interval_end < start_day then "whatever" else
(case when @farest_interval_end <= end_day then @farest_interval_end:= end_day end)
end)
, @farest_interval_start as farest_interval_start
, @farest_interval_end as farest_interval_end
, @prev_hall_id := hall_id
# ----只需要改动这里的逻辑,其他不要动。注意里面的语句是“顺序执行的”-----------
from (select * from HallEvents order by hall_id asc, start_day asc, end_day asc) sorted_HallEvents,
(select @prev_hall_id := null, @farest_interval_start := null, @farest_interval_end := null, @interval_id := 0) vars
)
select hall_id, farest_interval_start as start_day
, farest_interval_end as end_day
from
(select *,
row_number() over(partition by interval_id order by start_day desc, end_day desc) as rn
from t2) tmp1
where rn=1
order by hall_id, start_day, end_day
等效pandas代码
import pandas as pd
def merge_events(hall_events: pd.DataFrame) -> pd.DataFrame:
# Sort the DataFrame by hall_id, start_day, and end_day
sorted_events = hall_events.sort_values(by=['hall_id', 'start_day', 'end_day'])
# Initialize the variables
prev_hall_id = None
farest_interval_start = None
farest_interval_end = None
interval_id = 0
intervals = []
# Iterate over the rows of the sorted DataFrame
for row in sorted_events.itertuples(index=False):
# Check if we need to reset the variables because the hall_id has changed
if prev_hall_id is None or prev_hall_id != row.hall_id:
intervals.append((prev_hall_id, farest_interval_start, farest_interval_end, interval_id))
farest_interval_start = row.start_day
farest_interval_end = row.end_day
interval_id += 1
# Check if the new interval starts after the farthest interval end
if row.start_day > farest_interval_end:
# Append the finished interval to the list
intervals.append((prev_hall_id, farest_interval_start, farest_interval_end, interval_id))
# Start a new interval
farest_interval_start = row.start_day
farest_interval_end = row.end_day
interval_id += 1
# This case is when the new interval overlaps with the farthest interval end
elif row.end_day > farest_interval_end:
# Extend the current interval
farest_interval_end = row.end_day
prev_hall_id=row.hall_id
print(f"{row.hall_id} {prev_hall_id} {farest_interval_start} {farest_interval_end} {row.start_day} {row.end_day}")
# Add the last interval to the list if it exists
if farest_interval_start is not None and farest_interval_end is not None:
intervals.append((prev_hall_id, farest_interval_start, farest_interval_end, interval_id))
# Create a DataFrame from the list of intervals
intervals_df = pd.DataFrame(intervals, columns=['hall_id', 'start_day', 'end_day', 'interval_id'])
# Select the last interval for each interval_id
final_intervals_df = intervals_df.sort_values(by=['interval_id', 'start_day', 'end_day'], ascending=[True, False, False])
final_intervals_df = final_intervals_df.drop_duplicates(subset='interval_id', keep='first')
# Sort the final DataFrame and reset the index
final_intervals_df = final_intervals_df.sort_values(by=['hall_id', 'start_day', 'end_day']).reset_index(drop=True)
# Drop the interval_id column as it's not needed in the final output
final_intervals_df = final_intervals_df.drop(columns='interval_id').dropna(subset=['hall_id'])
return final_intervals_df