season | month |
1 | 1 |
1 | 2 |
1 | 3 |
2 | 4 |
2 | 5 |
2 | 6 |
3 | 7 |
3 | 8 |
3 | 9 |
4 | 10 |
4 | 11 |
4 | 12 |
1-4 代表4个季节;1-12代表12个月。
import pandas as pd
data_path = 'test_dada.csv' # 读取数据到内存
data = pd.read_csv(data_path)
dummy_fields = ['season', 'month'] # 所有类型编码变量的名称
for each in dummy_fields:
# 取出所有类型变量,并将它们转变为独热编码
dummies = pd.get_dummies(data[each], prefix=each, drop_first=False)
# 将新的独热编码变量与原有的所有变量合并到一起
data = pd.concat([data, dummies], axis=1)
# 将原来的类型变量从数据表中删除
fields_to_drop = ['season', 'month'] # 要删除的类型变量的名称
data = data.drop(fields_to_drop, axis=1) # 将它们从数据库的变量中删除
代码运行后,生成
season_1 | season_2 | season_3 | season_4 | month_1 | month_2 | month_3 | month_4 | month_5 | month_6 | month_7 | month_8 | month_9 | month_10 | month_11 | month_12 |
1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |