选题背景介绍
商业和住宅建筑在内的建筑业的全球能源消耗约为20%。随着人口的快速增长和经济增长,预计从1年到3年,建筑物的能源消耗将以每年2018.2050%的速度增长;这种不断增长的能源需求引起了全世界对其对环境负面影响的极大关注。为了满足不断增长的电力需求,需要高效且具有成本效益的运营
选题意义
能源规划:通过对居民用电量的分析,可以帮助政府和能源部门更好地规划和管理能源供应。了解不同地区、不同季节的用电高峰和低谷,可以有针对性地进行能源调配和供给。节能减排:通过对居民用电量的分析,可以发现哪些地区或群体的能源利用效率较低,从而有针对性地开展节能宣传和政策支持,促进节能减排工作的开展。社会经济发展:居民用电量的增长可以反映出当地经济发展水平和生活水平的提高,这对于评估当地的社会经济状况和发展趋势具有重要意义。城市规划:通过对不同城市、地区居民用电量的分析,可以帮助城市规划者更好地规划城市建设和基础设施建设,以适应未来的能源需求。安全稳定:监测居民用电量可以帮助预测电力系统的负荷,有助于确保电网的安全稳定运行,避免因用电高峰而导致的电力供应紧张或故障。
数据集简介
本数据集来源于kaggle,包含以下数据
月份:数据记录的月份总用电量(overall):该月新加坡的总用电量公共住房(public housing):包括1至5房间公共住房的用电量私人住宅(private housing):包括私人公寓和独立住宅的用电量其他类别(others):可能指一些特殊类型的建筑或设施的用电量
大数据分析实验
数据清洗
将原本不易于查看的英文月份转化为数字
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.pyplot import figure
housing_df = pd.read_csv('total_household_electricity_consumption_2005_2020.csv')
def convertfloat(x):
x['overall'] = x['overall'].astype(float)
x['public_housing'] = x['public_housing'].astype(float)
x['1-room_2-room'] = x['1-room_2-room'].astype(float)
x['3-room'] = x['3-room'].astype(float)
x['4-room'] = x['4-room'].astype(float)
x['5-room_and_executive'] = x['5-room_and_executive'].astype(float)
x['private_housing'] = x['private_housing'].astype(float)
x['private_apts_and_condo'] = x['private_apts_and_condo'].astype(float)
x['landed_properties'] = x['landed_properties'].astype(float)
x['others'] = x['others'].astype(float)
return x
housing_df = housing_df.sort_values(by = ['month'], ascending = True)
housing_df = convertfloat(housing_df)
housing_df = housing_df.rename(columns = {'public_housing': 'public_housing_total', 'private_housing': 'private_housing_total'})
housing_df_month = housing_df.copy()
housing_df_dwelling_type = housing_df.copy()
def rename(x):
if '2019' in x:
x = '2019'
elif '2018' in x:
x = '2018'
elif '2017' in x:
x = '2017'
elif '2016' in x:
x = '2016'
elif '2015' in x:
x = '2015'
return x
housing_df['month'] = housing_df['month'].apply(rename)
housing_df = housing_df.rename(columns = {'month': 'year'})
housing_df_2015 = housing_df[housing_df['year'] == '2015']
housing_df_2016 = housing_df[housing_df['year'] == '2016']
housing_df_2017 = housing_df[housing_df['year'] == '2017']
housing_df_2018 = housing_df[housing_df['year'] == '2018']
housing_df_2019 = housing_df[housing_df['year'] == '2019']
housing_lists = [housing_df_2015, housing_df_2016, housing_df_2017, housing_df_2018, housing_df_2019]
y_years = ['2015', '2016', '2017', '2018', '2019']
x_yearly_total = []
for year in housing_lists:
total = round(year['overall'].sum(), 1)
x_yearly_total.append(total)
print(housing_df.tail(15))
每年的用电量使用情况折线图
housing_lists = [housing_df_2015, housing_df_2016, housing_df_2017, housing_df_2018, housing_df_2019]
y_years = ['2015', '2016', '2017', '2018', '2019']
x_yearly_total = []
for year in housing_lists:
total = round(year['overall'].sum(), 1)
x_yearly_total.append(total)
plt.plot(y_years, x_yearly_total, marker='o', linestyle='solid')
plt.title('Total Annual Household Electricity Consumption')
plt.xlabel('Year')
plt.ylabel('Energy Consumption (in GWh)')
plt.show()
分析断路器是如何影响能源消耗的
用一个函数过滤了相关年份
y_month = []
if '2019' in x or '2020' in x:
else:
housing_df_month['month'] = housing_df_month['month'].apply(retrieve_2019_2020)
housing_df_month = housing_df_month.dropna()
for value in housing_df_month['overall']:
plt.figure(figsize=(20,10))
plt.plot(y_month, x_total, marker = 'o', linestyle = 'solid')
plt.title('Monthly Annual Electricity Consumption from Jan 2019 to Jun 2020')
plt.ylabel('Energy Consumption (in GWh)')
按住宅类型划分的月用电量
housing_df_dwelling_type['month'] = housing_df_dwelling_type['month'].apply(retrieve_years)
housing_df_dwelling_type = housing_df_dwelling_type.dropna()
one_two_rooms = []
three_rooms = []
four_rooms = []
five_rooms = []
private_apts = []
landed_properties = []
for value in housing_df_dwelling_type['1-room_2-room']:
one_two_rooms.append(value)
for value in housing_df_dwelling_type['3-room']:
three_rooms.append(value)
for value in housing_df_dwelling_type['4-room']:
four_rooms.append(value)
for value in housing_df_dwelling_type['5-room_and_executive']:
five_rooms.append(value)
for value in housing_df_dwelling_type['private_apts_and_condo']:
private_apts.append(value)
for value in housing_df_dwelling_type['landed_properties']:
landed_properties.append(value)
plt.figure(figsize=(27,10))
plt.plot(years, one_two_rooms, marker = 'o', linestyle = 'solid', label = '1 to 2 rooms')
plt.plot(years, three_rooms, marker = 'o', linestyle = 'solid', label = '3-rooms')
plt.plot(years, four_rooms, marker = 'o', linestyle = 'solid', label = '4-rooms')
plt.plot(years, five_rooms, marker = 'o', linestyle = 'solid', label = '5-rooms')
plt.plot(years, private_apts, marker = 'o', linestyle = 'solid', label = 'private apts')
plt.plot(years, landed_properties, marker = 'o', linestyle = 'solid', label = 'landed apts')
plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
plt.xlabel('Year/ Month')
plt.ylabel('Energy Consumption (in GWh)')
plt.legend()
plt.show()
# 绘制2019年至2020年各类住宅类型用电量的堆叠柱状图
df = pd.DataFrame({'1 to 2 rooms': one_two_rooms,
'3-rooms': three_rooms,
'4-rooms': four_rooms,
'5-rooms': five_rooms,
'private apts': private_apts,
'landed apts': landed_properties}, index=years)
ax = df.plot.bar(stacked=True, figsize=(20, 10))
ax.set_xlabel("Year/Month")
ax.set_ylabel("Energy Consumption (in GWh)")
plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
plt.show()
绘制2018年至2020年各类住宅类型用电量的热力图
import seaborn as sns
# housing_df_dwelling_type = housing_df_dwelling_type.drop(['Year'], axis=1)
housing_df_dwelling_type = housing_df_dwelling_type.groupby(by='month').sum()
housing_df_dwelling_type = housing_df_dwelling_type.T
housing_df_dwelling_type = housing_df_dwelling_type.reset_index()
housing_df_dwelling_type = housing_df_dwelling_type.rename(columns={'index': 'Dwelling Type'})
housing_df_dwelling_type = pd.melt(housing_df_dwelling_type, id_vars=['Dwelling Type'], var_name='Month',
value_name='Energy Consumption')
housing_df_dwelling_type['Year'] = housing_df_dwelling_type.apply(lambda row: row['Month'][:4], axis=1)
plt.figure(figsize=(20, 10))
sns.heatmap(housing_df_dwelling_type.pivot_table(values='Energy Consumption', index='Dwelling Type',
columns=['Year', 'Month']), cmap='YlGnBu')
plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
plt.show()
总结
根据分析每年的用电量使用情况折线图,我们可以看出,随着时间的推移,能源消耗的总量有所增加,但波动较大。这主要是由于季节性需求变化、气候影响和人口增长等因素导致的。然而,当我们进一步分析住宅类型划分的月用电量时,我们发现断路器是一个非常重要的因素,对能源消耗有明显的影响。首先,断路器的容量大小会对用电量产生直接影响。在拥有相同数量电器设备的情况下,断路器容量较小的住宅每月用电量更少。这是因为断路器容量较小的住宅需要更多的限制和控制,防止电器设备超出其容纳范围而触发断路器跳闸。这种限制和控制会促使居民更加节能和环保意识,从而减少用电量和能源消耗。其次,断路器的状态和维护也会影响用电量。断路器的故障或未及时维护会导致电力系统的不稳定和能源浪费。例如,断路器失效或触发跳闸可能会导致电器设备长时间处于开启状态,从而造成不必要的能源消耗。因此,定期检查和维护断路器可以保持电力系统的稳定和正常运行,降低用电量和能源消耗。
源码:
- import pandas as pd
- import matplotlib.pyplot as plt
- import numpy as np
- from matplotlib.pyplot import figure
- # 读取电力消费数据
- housing_df = pd.read_csv('electricity_consumption.csv')
- # 将字符串类型的数据转换为浮点型
- def convertfloat(x):
- x['overall'] = x['overall'].astype(float)
- x['public_housing'] = x['public_housing'].astype(float)
- x['1-room_2-room'] = x['1-room_2-room'].astype(float)
- x['3-room'] = x['3-room'].astype(float)
- x['4-room'] = x['4-room'].astype(float)
- x['5-room_and_executive'] = x['5-room_and_executive'].astype(float)
- x['private_housing'] = x['private_housing'].astype(float)
- x['private_apts_and_condo'] = x['private_apts_and_condo'].astype(float)
- x['landed_properties'] = x['landed_properties'].astype(float)
- x['others'] = x['others'].astype(float)
- return x
- # 对数据进行排序和转换
- housing_df = housing_df.sort_values(by = ['month'], ascending = True)
- housing_df = convertfloat(housing_df)
- housing_df = housing_df.rename(columns = {'public_housing': 'public_housing_total', 'private_housing': 'private_housing_total'})
- housing_df_month = housing_df.copy()
- housing_df_dwelling_type = housing_df.copy()
- # 将年份进行重命名
- def rename(x):
- if '2019' in x:
- x = '2019'
- elif '2018' in x:
- x = '2018'
- elif '2017' in x:
- x = '2017'
- elif '2016' in x:
- x = '2016'
- elif '2015' in x:
- x = '2015'
- return x
- # 按年份提取数据
- housing_df['month'] = housing_df['month'].apply(rename)
- housing_df = housing_df.rename(columns = {'month': 'year'})
- housing_df_2015 = housing_df[housing_df['year'] == '2015']
- housing_df_2016 = housing_df[housing_df['year'] == '2016']
- housing_df_2017 = housing_df[housing_df['year'] == '2017']
- housing_df_2018 = housing_df[housing_df['year'] == '2018']
- housing_df_2019 = housing_df[housing_df['year'] == '2019']
- # 计算每年的总电力消耗量
- housing_lists = [housing_df_2015, housing_df_2016, housing_df_2017, housing_df_2018, housing_df_2019]
- y_years = ['2015', '2016', '2017', '2018', '2019']
- x_yearly_total = []
- for year in housing_lists:
- total = round(year['overall'].sum(), 1)
- x_yearly_total.append(total)
- print(housing_df.tail(15))
- # 绘制总年度家庭用电量折线图
- housing_lists = [housing_df_2015, housing_df_2016, housing_df_2017, housing_df_2018, housing_df_2019]
- y_years = ['2015', '2016', '2017', '2018', '2019']
- x_yearly_total = []
- for year in housing_lists:
- total = round(year['overall'].sum(), 1)
- x_yearly_total.append(total)
- #使用matplotlib库来绘制一个图表,并定义了一个名为retrieve_2019_2020的函数
- plt.plot(y_years, x_yearly_total, marker='o', linestyle='solid')
- plt.title('Total Annual Household Electricity Consumption')
- plt.xlabel('Year')
- plt.ylabel('Energy Consumption (in GWh)')
- plt.show()
- y_month = []
- x_total = []
- def retrieve_2019_2020(x):
- if '2019' in x or '2020' in x:
- y_month.append(x)
- return x
- else:
- x = 0
- #使用Pandas和matplotlib库来处理和可视化数据
- housing_df_month['month'] = housing_df_month['month'].apply(retrieve_2019_2020)
- housing_df_month = housing_df_month.dropna()
- for value in housing_df_month['overall']:
- x_total.append(value)
- plt.figure(figsize=(20,10))
- # 绘制2019年至2020年6月的每月家庭用电量折线图
- plt.plot(y_month, x_total, marker = 'o', linestyle = 'solid')
- plt.title('Monthly Annual Electricity Consumption from Jan 2019 to Jun 2020')
- plt.xlabel('Year')
- plt.ylabel('Energy Consumption (in GWh)')
- plt.show()
- housing_df_dwelling_type = housing_df_dwelling_type.drop(['overall', 'public_housing_total', 'private_housing_total'],
- axis=1)
- years = []
- # 提取2018年至2020年的数据
- def retrieve_years(x):
- if '2018' in x or '2019' in x or '2020' in x:
- years.append(x)
- return x
- else:
- x = 0
- housing_df_dwelling_type['month'] = housing_df_dwelling_type['month'].apply(retrieve_years)
- housing_df_dwelling_type = housing_df_dwelling_type.dropna()
- one_two_rooms = []
- three_rooms = []
- four_rooms = []
- five_rooms = []
- private_apts = []
- landed_properties = []
- # 提取各类住宅类型的用电量数据
- for value in housing_df_dwelling_type['1-room_2-room']:
- one_two_rooms.append(value)
- for value in housing_df_dwelling_type['3-room']:
- three_rooms.append(value)
- for value in housing_df_dwelling_type['4-room']:
- four_rooms.append(value)
- for value in housing_df_dwelling_type['5-room_and_executive']:
- five_rooms.append(value)
- for value in housing_df_dwelling_type['private_apts_and_condo']:
- private_apts.append(value)
- for value in housing_df_dwelling_type['landed_properties']:
- landed_properties.append(value)
- plt.figure(figsize=(27,10))
- # 绘制2018年至2020年各类住宅类型的用电量折线图
- plt.plot(years, one_two_rooms, marker = 'o', linestyle = 'solid', label = '1 to 2 rooms')
- plt.plot(years, three_rooms, marker = 'o', linestyle = 'solid', label = '3-rooms')
- plt.plot(years, four_rooms, marker = 'o', linestyle = 'solid', label = '4-rooms')
- plt.plot(years, five_rooms, marker = 'o', linestyle = 'solid', label = '5-rooms')
- plt.plot(years, private_apts, marker = 'o', linestyle = 'solid', label = 'private apts')
- plt.plot(years, landed_properties, marker = 'o', linestyle = 'solid', label = 'landed apts')
- plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
- plt.xlabel('Year/ Month')
- plt.ylabel('Energy Consumption (in GWh)')
- plt.legend()
- plt.show()
- # 绘制2019年至2020年各类住宅类型用电量的堆叠柱状图
- df = pd.DataFrame({'1 to 2 rooms': one_two_rooms,
- '3-rooms': three_rooms,
- '4-rooms': four_rooms,
- '5-rooms': five_rooms,
- 'private apts': private_apts,
- 'landed apts': landed_properties}, index=years)
- ax = df.plot.bar(stacked=True, figsize=(20, 10))
- ax.set_xlabel("Year/Month")
- ax.set_ylabel("Energy Consumption (in GWh)")
- plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
- plt.show()
- # 绘制2018年至2020年各类住宅类型用电量的热力图
- import seaborn as sns
- # housing_df_dwelling_type = housing_df_dwelling_type.drop(['Year'], axis=1)
- housing_df_dwelling_type = housing_df_dwelling_type.groupby(by='month').sum()
- housing_df_dwelling_type = housing_df_dwelling_type.T
- housing_df_dwelling_type = housing_df_dwelling_type.reset_index()
- housing_df_dwelling_type = housing_df_dwelling_type.rename(columns={'index': 'Dwelling Type'})
- housing_df_dwelling_type = pd.melt(housing_df_dwelling_type, id_vars=['Dwelling Type'], var_name='Month',
- value_name='Energy Consumption')
- housing_df_dwelling_type['Year'] = housing_df_dwelling_type.apply(lambda row: row['Month'][:4], axis=1)
- #展示了从2018年1月到2020年6月不同类型住宅的每月电力消耗。
- plt.figure(figsize=(20, 10))
- sns.heatmap(housing_df_dwelling_type.pivot_table(values='Energy Consumption', index='Dwelling Type',
- columns=['Year', 'Month']), cmap='YlGnBu')
- plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
- plt.show()
- # 创建2019年各类住宅类型用电量占比的饼状图
- plt.figure(figsize=(10, 5))
- labels = ['1 to 2 rooms', '3-rooms', '4-rooms', '5-rooms', 'private apts', 'landed apts']
- sizes_2019 = [sum(one_two_rooms[:12]), sum(three_rooms[:12]), sum(four_rooms[:12]), sum(five_rooms[:12]), sum(private_apts[:12]), sum(landed_properties[:12])]
- plt.pie(sizes_2019, labels=labels, autopct='%1.1f%%')
- plt.title('2019 Electricity Consumption by Dwelling Type')
- plt.show()
- # 创建2020年6月各类住宅类型用电量占比的饼状图
- plt.figure(figsize=(10, 5))
- sizes_2020_06 = [one_two_rooms[-1], three_rooms[-1], four_rooms[-1], five_rooms[-1], private_apts[-1], landed_properties[-1]]
- plt.pie(sizes_2020_06, labels=labels, autopct='%1.1f%%')
- plt.title('2020 June Electricity Consumption by Dwelling Type')
- plt.show()
- # 创建2019年各类住宅类型用电量总量的柱形图
- plt.figure(figsize=(10, 5))
- plt.bar(labels, [sum(one_two_rooms[:12]), sum(three_rooms[:12]), sum(four_rooms[:12]), sum(five_rooms[:12]), sum(private_apts[:12]), sum(landed_properties[:12])])
- plt.title('2019 Electricity Consumption by Dwelling Type')
- plt.xlabel('Dwelling Type')
- plt.ylabel('Energy Consumption (in GWh)')
- plt.show()
- # 创建2020年6月各类住宅类型用电量总量的柱形图
- plt.figure(figsize=(10, 5))
- plt.bar(labels, sizes_2020_06)
- plt.title('2020 June Electricity Consumption by Dwelling Type')
- plt.xlabel('Dwelling Type')
- plt.ylabel('Energy Consumption (in GWh)')
- plt.show()
- #展示从2018年1月到2020年6月不同类型住宅的每月电力消耗。
- df = pd.DataFrame({'1 to 2 rooms': one_two_rooms, '3-rooms': three_rooms, '4-rooms': four_rooms, '5-rooms': five_rooms, 'private apts': private_apts, 'landed apts': landed_properties}, index=years)
- ax = df.plot.bar(stacked=True, figsize=(20, 10))
- ax.set_xlabel("Year/Month")
- ax.set_ylabel("Energy Consumption (in GWh)")
- plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types')
- plt.show()
- #使用matplotlib库来绘制两个图表,展示家庭电力消耗情况。
- plt.plot(y_years, x_yearly_total, marker='o', linestyle='solid')
- plt.title('Total Annual Household Electricity Consumption', fontweight='bold', fontsize=14)
- plt.xlabel('Year', fontsize=12)
- plt.ylabel('Energy Consumption (in GWh)', fontsize=12)
- plt.grid(True) # 添加网格线
- plt.show()
- plt.plot(y_month, x_total, marker='o', linestyle='solid')
- plt.title('Monthly Annual Electricity Consumption from Jan 2019 to Jun 2020', fontweight='bold', fontsize=14)
- plt.xlabel('Year', fontsize=12)
- plt.ylabel('Energy Consumption (in GWh)', fontsize=12)
- plt.xticks(rotation=45) # 旋转 x 轴刻度标签
- for i in range(len(y_month)):
- plt.text(y_month[i], x_total[i], str(round(x_total[i], 1)), ha='center', va='bottom') # 添加数据标签
- plt.grid(True) # 添加网格线
- plt.show()
- #显示从2018年1月到2020年6月不同类型住宅的每月电力消耗
- plt.plot(years, one_two_rooms, marker='o', linestyle='-', color='blue', label='1 to 2 rooms')
- plt.plot(years, three_rooms, marker='o', linestyle='--', color='orange', label='3-rooms')
- plt.plot(years, four_rooms, marker='o', linestyle=':', color='green', label='4-rooms')
- plt.plot(years, five_rooms, marker='o', linestyle='-.', color='red', label='5-rooms')
- plt.plot(years, private_apts, marker='o', linestyle='-', color='purple', label='private apts')
- plt.plot(years, landed_properties, marker='o', linestyle='--', color='pink', label='landed apts')
- plt.title('Monthly Electricity Consumption from Jan 2018 to Jun 2020 Across Various Dwelling Types', fontweight='bold', fontsize=14)
- plt.xlabel('Year/Month', fontsize=12)
- plt.ylabel('Energy Consumption (in GWh)', fontsize=12)
- plt.legend(loc='upper left') # 添加图例,位置在左上角
- plt.grid(True) # 添加网格线
- plt.show()