3.5-io读取与存储
3.5.1 有哪些io方式¶
- 数据分析阶段的重点:分析、建模
3.5.2 读取和存储csv¶
- 存储、读取、索引设置
- 数据追加
3.5.3 读取和存储excel¶
- 存储、读取、工作表设置
- 数据追加
import pandas as pd import numpy as np data = pd.DataFrame(np.random.randn(1000,3),columns=["a",'b','c'],index=pd.date_range('20200101',periods=1000)) dataOut[ ]:
a | b | c | |
---|---|---|---|
2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... |
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 3 columns
In [ ]:# 数据存储 data.to_csv('txt.csv')In [ ]:
pd.read_csv('txt.csv')Out[ ]:
Unnamed: 0 | a | b | c | |
---|---|---|---|---|
0 | 2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
1 | 2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2 | 2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
3 | 2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
4 | 2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... | ... |
995 | 2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
996 | 2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
997 | 2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
998 | 2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
999 | 2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 4 columns
In [ ]:## 上面读取时候索引变了,有两种方法可以变成我们原来要的样子 ## 方法一:读取时候设置索引 pd.read_csv('txt.csv',index_col=['Unnamed: 0'])Out[ ]:
a | b | c | |
---|---|---|---|
2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... |
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 3 columns
In [ ]:## 方法二:存储时对数据索引进行命名: date data.indexOut[ ]:
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10', ... '2022-09-17', '2022-09-18', '2022-09-19', '2022-09-20', '2022-09-21', '2022-09-22', '2022-09-23', '2022-09-24', '2022-09-25', '2022-09-26'], dtype='datetime64[ns]', length=1000, freq='D')In [ ]:
data.index.names = ['date']In [ ]:
dataOut[ ]:
a | b | c | |
---|---|---|---|
date | |||
2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... |
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 3 columns
In [ ]:data.to_csv('txt.csv') # 完全覆盖/替换In [ ]:
# 对已有文件进行数据追加 data2=data.tail() data2Out[ ]:
a | b | c | |
---|---|---|---|
date | |||
2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
data2.to_csv('txt.csv',mode='a',header=False) # a追加模式且不写入列名In [ ]:
# excel的存储和读取 filename = 'excel.xlsx' data.to_excel(filename,sheet_name='a')In [ ]:
pd.read_excel(filename)Out[ ]:
date | a | b | c | |
---|---|---|---|---|
0 | 2020-01-01 | -0.275344 | -0.216934 | -0.083554 |
1 | 2020-01-02 | -0.667199 | -0.722457 | 0.587480 |
2 | 2020-01-03 | 0.270504 | -0.363369 | 1.450482 |
3 | 2020-01-04 | -0.621467 | 0.067100 | -0.161752 |
4 | 2020-01-05 | 0.701313 | 1.162024 | 0.929627 |
... | ... | ... | ... | ... |
995 | 2022-09-22 | 0.264954 | 0.972600 | 0.249330 |
996 | 2022-09-23 | 0.193843 | 0.535633 | 0.796537 |
997 | 2022-09-24 | 0.209620 | -0.995445 | -0.202398 |
998 | 2022-09-25 | -1.751889 | 0.253133 | 0.573625 |
999 | 2022-09-26 | -2.156979 | 1.351400 | 0.036575 |
1000 rows × 4 columns
In [ ]:# 一次写入多个sheet with pd.ExcelWriter('writer.xlsx') as writer: data.to_excel(writer,sheet_name='a') data.to_excel(writer,sheet_name='b') data.to_excel(writer,sheet_name='c')In [ ]:
# 追加新sheet with pd.ExcelWriter('writer.xlsx',mode='a',engine='openpyxl') as writer: data2.to_excel(writer,sheet_name="d")In [ ]: 标签:...,01,读取,io,09,3.5,2020,2022,data From: https://www.cnblogs.com/mlzxdzl/p/17772473.html