标签:... False NaN 第一节 第一章 载入 female Mr male
import numpy as np
import pandas as pd
import os
df=pd.read_csv('train.csv')#拿出train.csv中的训练集,使用相对路径
df.shape#维数
(891, 12)
df.T#行与列交换
|
0 |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
... |
881 |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
889 |
890 |
PassengerId |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
... |
882 |
883 |
884 |
885 |
886 |
887 |
888 |
889 |
890 |
891 |
Survived |
0 |
1 |
1 |
1 |
0 |
0 |
0 |
0 |
1 |
1 |
... |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
1 |
0 |
Pclass |
3 |
1 |
3 |
1 |
3 |
3 |
1 |
3 |
3 |
2 |
... |
3 |
3 |
2 |
3 |
3 |
2 |
1 |
3 |
1 |
3 |
Name |
Braund, Mr. Owen Harris |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
Heikkinen, Miss. Laina |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
Allen, Mr. William Henry |
Moran, Mr. James |
McCarthy, Mr. Timothy J |
Palsson, Master. Gosta Leonard |
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) |
Nasser, Mrs. Nicholas (Adele Achem) |
... |
Markun, Mr. Johann |
Dahlberg, Miss. Gerda Ulrika |
Banfield, Mr. Frederick James |
Sutehall, Mr. Henry Jr |
Rice, Mrs. William (Margaret Norton) |
Montvila, Rev. Juozas |
Graham, Miss. Margaret Edith |
Johnston, Miss. Catherine Helen "Carrie" |
Behr, Mr. Karl Howell |
Dooley, Mr. Patrick |
Sex |
male |
female |
female |
female |
male |
male |
male |
male |
female |
female |
... |
male |
female |
male |
male |
female |
male |
female |
female |
male |
male |
Age |
22.0 |
38.0 |
26.0 |
35.0 |
35.0 |
NaN |
54.0 |
2.0 |
27.0 |
14.0 |
... |
33.0 |
22.0 |
28.0 |
25.0 |
39.0 |
27.0 |
19.0 |
NaN |
26.0 |
32.0 |
SibSp |
1 |
1 |
0 |
1 |
0 |
0 |
0 |
3 |
0 |
1 |
... |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
0 |
0 |
Parch |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
1 |
2 |
0 |
... |
0 |
0 |
0 |
0 |
5 |
0 |
0 |
2 |
0 |
0 |
Ticket |
A/5 21171 |
PC 17599 |
STON/O2. 3101282 |
113803 |
373450 |
330877 |
17463 |
349909 |
347742 |
237736 |
... |
349257 |
7552 |
C.A./SOTON 34068 |
SOTON/OQ 392076 |
382652 |
211536 |
112053 |
W./C. 6607 |
111369 |
370376 |
Fare |
7.25 |
71.2833 |
7.925 |
53.1 |
8.05 |
8.4583 |
51.8625 |
21.075 |
11.1333 |
30.0708 |
... |
7.8958 |
10.5167 |
10.5 |
7.05 |
29.125 |
13.0 |
30.0 |
23.45 |
30.0 |
7.75 |
Cabin |
NaN |
C85 |
NaN |
C123 |
NaN |
NaN |
E46 |
NaN |
NaN |
NaN |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
B42 |
NaN |
C148 |
NaN |
Embarked |
S |
C |
S |
S |
S |
Q |
S |
S |
S |
C |
... |
S |
S |
S |
S |
Q |
S |
S |
S |
C |
Q |
12 rows × 891 columns
path=os.path.abspath('train.csv')#查找绝对路径
pd.read_csv(path)#使用绝对路径
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
pd.read_table(path)#默认竖线为分隔符
|
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked |
0 |
1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/... |
1 |
2,1,1,"Cumings, Mrs. John Bradley (Florence Br... |
2 |
3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,S... |
3 |
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May ... |
4 |
5,0,3,"Allen, Mr. William Henry",male,35,0,0,3... |
... |
... |
886 |
887,0,2,"Montvila, Rev. Juozas",male,27,0,0,21... |
887 |
888,1,1,"Graham, Miss. Margaret Edith",female,... |
888 |
889,0,3,"Johnston, Miss. Catherine Helen ""Car... |
889 |
890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,11... |
890 |
891,0,3,"Dooley, Mr. Patrick",male,32,0,0,3703... |
891 rows × 1 columns
pd.read_table(path,sep=',')#将默认改为逗号为分隔符
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df=pd.read_csv('train.csv',chunksize=1000)#chunksize为迭代数量,10000为迭代一次的数量
for i in df:#得到逐块数据
print(i)
PassengerId Survived Pclass \
0 1 0 3
1 2 1 1
2 3 1 3
3 4 1 1
4 5 0 3
.. ... ... ...
886 887 0 2
887 888 1 1
888 889 0 3
889 890 1 1
890 891 0 3
Name Sex Age SibSp \
0 Braund, Mr. Owen Harris male 22.0 1
1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1
2 Heikkinen, Miss. Laina female 26.0 0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
4 Allen, Mr. William Henry male 35.0 0
.. ... ... ... ...
886 Montvila, Rev. Juozas male 27.0 0
887 Graham, Miss. Margaret Edith female 19.0 0
888 Johnston, Miss. Catherine Helen "Carrie" female NaN 1
889 Behr, Mr. Karl Howell male 26.0 0
890 Dooley, Mr. Patrick male 32.0 0
Parch Ticket Fare Cabin Embarked
0 0 A/5 21171 7.2500 NaN S
1 0 PC 17599 71.2833 C85 C
2 0 STON/O2. 3101282 7.9250 NaN S
3 0 113803 53.1000 C123 S
4 0 373450 8.0500 NaN S
.. ... ... ... ... ...
886 0 211536 13.0000 NaN S
887 0 112053 30.0000 B42 S
888 2 W./C. 6607 23.4500 NaN S
889 0 111369 30.0000 C148 C
890 0 370376 7.7500 NaN Q
[891 rows x 12 columns]
df.get_chunk()#得到逐块数据(方法2)
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df=pd.read_csv('train.csv')
df.columns=['乘客ID','是否幸存','乘客等级(1/2/3等舱位)','乘客姓名','性别','年龄','堂兄弟/妹个数','父母与小孩个数','船票信息','票价','客舱','登船港口']
#将表头改成中文,索引改为乘客ID,直接进行替换
df
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df=pd.read_csv('train.csv',names=['乘客ID','是否幸存','乘客等级(1/2/3等舱位)','乘客姓名','性别','年龄','堂兄弟/妹个数','父母与小孩个数','船票信息','票价','客舱','登船港口'])
#将表头改成中文,索引改为乘客ID,相当于多加一个表头
df
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
1 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22 |
1 |
0 |
A/5 21171 |
7.25 |
NaN |
S |
2 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
3 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26 |
0 |
0 |
STON/O2. 3101282 |
7.925 |
NaN |
S |
4 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35 |
1 |
0 |
113803 |
53.1 |
C123 |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
887 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27 |
0 |
0 |
211536 |
13 |
NaN |
S |
888 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19 |
0 |
0 |
112053 |
30 |
B42 |
S |
889 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.45 |
NaN |
S |
890 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26 |
0 |
0 |
111369 |
30 |
C148 |
C |
891 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32 |
0 |
0 |
370376 |
7.75 |
NaN |
Q |
892 rows × 12 columns
df.info()#查看数据的基本信息
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 892 entries, 0 to 891
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 乘客ID 892 non-null object
1 是否幸存 892 non-null object
2 乘客等级(1/2/3等舱位) 892 non-null object
3 乘客姓名 892 non-null object
4 性别 892 non-null object
5 年龄 715 non-null object
6 堂兄弟/妹个数 892 non-null object
7 父母与小孩个数 892 non-null object
8 船票信息 892 non-null object
9 票价 892 non-null object
10 客舱 205 non-null object
11 登船港口 890 non-null object
dtypes: object(12)
memory usage: 83.8+ KB
df.describe()#查看数据的基本信息
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
count |
892 |
892 |
892 |
892 |
892 |
715 |
892 |
892 |
892 |
892 |
205 |
890 |
unique |
892 |
3 |
4 |
892 |
3 |
89 |
8 |
8 |
682 |
249 |
148 |
4 |
top |
PassengerId |
0 |
3 |
Name |
male |
24 |
0 |
0 |
1601 |
8.05 |
C23 C25 C27 |
S |
freq |
1 |
549 |
491 |
1 |
577 |
30 |
608 |
678 |
7 |
43 |
4 |
644 |
df.head(15)#前十五个数据
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
1 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22 |
1 |
0 |
A/5 21171 |
7.25 |
NaN |
S |
2 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
3 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26 |
0 |
0 |
STON/O2. 3101282 |
7.925 |
NaN |
S |
4 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35 |
1 |
0 |
113803 |
53.1 |
C123 |
S |
5 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35 |
0 |
0 |
373450 |
8.05 |
NaN |
S |
6 |
6 |
0 |
3 |
Moran, Mr. James |
male |
NaN |
0 |
0 |
330877 |
8.4583 |
NaN |
Q |
7 |
7 |
0 |
1 |
McCarthy, Mr. Timothy J |
male |
54 |
0 |
0 |
17463 |
51.8625 |
E46 |
S |
8 |
8 |
0 |
3 |
Palsson, Master. Gosta Leonard |
male |
2 |
3 |
1 |
349909 |
21.075 |
NaN |
S |
9 |
9 |
1 |
3 |
Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) |
female |
27 |
0 |
2 |
347742 |
11.1333 |
NaN |
S |
10 |
10 |
1 |
2 |
Nasser, Mrs. Nicholas (Adele Achem) |
female |
14 |
1 |
0 |
237736 |
30.0708 |
NaN |
C |
11 |
11 |
1 |
3 |
Sandstrom, Miss. Marguerite Rut |
female |
4 |
1 |
1 |
PP 9549 |
16.7 |
G6 |
S |
12 |
12 |
1 |
1 |
Bonnell, Miss. Elizabeth |
female |
58 |
0 |
0 |
113783 |
26.55 |
C103 |
S |
13 |
13 |
0 |
3 |
Saundercock, Mr. William Henry |
male |
20 |
0 |
0 |
A/5. 2151 |
8.05 |
NaN |
S |
14 |
14 |
0 |
3 |
Andersson, Mr. Anders Johan |
male |
39 |
1 |
5 |
347082 |
31.275 |
NaN |
S |
df.tail(15)#后十五个数据
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
877 |
877 |
0 |
3 |
Gustafsson, Mr. Alfred Ossian |
male |
20 |
0 |
0 |
7534 |
9.8458 |
NaN |
S |
878 |
878 |
0 |
3 |
Petroff, Mr. Nedelio |
male |
19 |
0 |
0 |
349212 |
7.8958 |
NaN |
S |
879 |
879 |
0 |
3 |
Laleff, Mr. Kristo |
male |
NaN |
0 |
0 |
349217 |
7.8958 |
NaN |
S |
880 |
880 |
1 |
1 |
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) |
female |
56 |
0 |
1 |
11767 |
83.1583 |
C50 |
C |
881 |
881 |
1 |
2 |
Shelley, Mrs. William (Imanita Parrish Hall) |
female |
25 |
0 |
1 |
230433 |
26 |
NaN |
S |
882 |
882 |
0 |
3 |
Markun, Mr. Johann |
male |
33 |
0 |
0 |
349257 |
7.8958 |
NaN |
S |
883 |
883 |
0 |
3 |
Dahlberg, Miss. Gerda Ulrika |
female |
22 |
0 |
0 |
7552 |
10.5167 |
NaN |
S |
884 |
884 |
0 |
2 |
Banfield, Mr. Frederick James |
male |
28 |
0 |
0 |
C.A./SOTON 34068 |
10.5 |
NaN |
S |
885 |
885 |
0 |
3 |
Sutehall, Mr. Henry Jr |
male |
25 |
0 |
0 |
SOTON/OQ 392076 |
7.05 |
NaN |
S |
886 |
886 |
0 |
3 |
Rice, Mrs. William (Margaret Norton) |
female |
39 |
0 |
5 |
382652 |
29.125 |
NaN |
Q |
887 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27 |
0 |
0 |
211536 |
13 |
NaN |
S |
888 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19 |
0 |
0 |
112053 |
30 |
B42 |
S |
889 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.45 |
NaN |
S |
890 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26 |
0 |
0 |
111369 |
30 |
C148 |
C |
891 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32 |
0 |
0 |
370376 |
7.75 |
NaN |
Q |
df.isnull()#true表示数据为空
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
0 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
1 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
2 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
3 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
4 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
887 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
888 |
False |
False |
False |
False |
False |
True |
False |
False |
False |
False |
True |
False |
889 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
890 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
891 rows × 12 columns
df.to_csv('train_chinese.csv')
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])#随机生成1维数据
s
a -0.557197
b 1.348717
c 0.228413
d 0.356737
e -0.123567
dtype: float64
s1=pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])#生成1维数据
s1
pandas.core.series.Series
s=pd.Series({'a':1,'b':2,'c':3,'d':4,'e':5})#将字典实列化
s
a 1
b 2
c 3
d 4
e 5
dtype: int64
d={'one':pd.Series([1,2,3,4,5],index=['a','b','c','d','e']),'two':pd.Series([6,7,8,9,10],index=['a','b','c','d','e'])}
d#生成二维数据
{'one': a 1
b 2
c 3
d 4
e 5
dtype: int64,
'two': a 6
b 7
c 8
d 9
e 10
dtype: int64}
pd.DataFrame(d)#生成有序列表
|
one |
two |
a |
1 |
6 |
b |
2 |
7 |
c |
3 |
8 |
d |
4 |
9 |
e |
5 |
10 |
df=pd.read_csv('train.csv')
df.columns#查看df每一列的项
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
df.Cabin#查看Cabin列的所有项(方法1)
0 NaN
1 C85
2 NaN
3 C123
4 NaN
...
886 NaN
887 B42
888 NaN
889 C148
890 NaN
Name: Cabin, Length: 891, dtype: object
df['Cabin']#查看Cabin列的所有项(方法2),返回类型为Series类型
0 NaN
1 C85
2 NaN
3 C123
4 NaN
...
886 NaN
887 B42
888 NaN
889 C148
890 NaN
Name: Cabin, Length: 891, dtype: object
df[['Cabin']]#返回类型为DataFrame类型
|
Cabin |
0 |
NaN |
1 |
C85 |
2 |
NaN |
3 |
C123 |
4 |
NaN |
... |
... |
886 |
NaN |
887 |
B42 |
888 |
NaN |
889 |
C148 |
890 |
NaN |
891 rows × 1 columns
test_1=pd.read_csv('test_1.csv')
test_1
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
a |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
100 |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
100 |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
100 |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
100 |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
100 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
100 |
887 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
100 |
888 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
100 |
889 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
100 |
890 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
100 |
891 rows × 14 columns
del test_1['a']#删除a列数据(方法1)
test_1
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 13 columns
a=test_1.pop('a')#删除a列数据(方法2),a表示删除的东西
test_1
0 100
1 100
2 100
3 100
4 100
...
886 100
887 100
888 100
889 100
890 100
Name: a, Length: 891, dtype: int64
test_1.drop(['a'],axis=1)#删除a列数据(方法3),axis=1表示列,axis=0表示行(返回的是副本,不是test_1本身)
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 13 columns
test_1.drop(['a'],axis=1,inplace=True)#inplace=True表示返回的是test_1本身,母本进行改变
test_1
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 13 columns
test_1=pd.read_csv('test_1.csv')
test_1
test_1.drop(['a','PassengerId','Survived','Pclass'],axis=1)#返回的是没有以上元素的副本
|
Unnamed: 0 |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
2 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
3 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
886 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
887 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
888 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
889 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
890 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 10 columns
test_1=pd.read_csv('test_1.csv')
test_1['Age'] < 10 #当Age小于10时返回True,其余返回Faulse
0 False
1 False
2 False
3 False
4 False
...
886 False
887 False
888 False
889 False
890 False
Name: Age, Length: 891, dtype: bool
test_1[test_1['Age'] < 10]#将结果为True的拿出来
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
a |
7 |
7 |
8 |
0 |
3 |
Palsson, Master. Gosta Leonard |
male |
2.00 |
3 |
1 |
349909 |
21.0750 |
NaN |
S |
100 |
10 |
10 |
11 |
1 |
3 |
Sandstrom, Miss. Marguerite Rut |
female |
4.00 |
1 |
1 |
PP 9549 |
16.7000 |
G6 |
S |
100 |
16 |
16 |
17 |
0 |
3 |
Rice, Master. Eugene |
male |
2.00 |
4 |
1 |
382652 |
29.1250 |
NaN |
Q |
100 |
24 |
24 |
25 |
0 |
3 |
Palsson, Miss. Torborg Danira |
female |
8.00 |
3 |
1 |
349909 |
21.0750 |
NaN |
S |
100 |
43 |
43 |
44 |
1 |
2 |
Laroche, Miss. Simonne Marie Anne Andree |
female |
3.00 |
1 |
2 |
SC/Paris 2123 |
41.5792 |
NaN |
C |
100 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
827 |
827 |
828 |
1 |
2 |
Mallet, Master. Andre |
male |
1.00 |
0 |
2 |
S.C./PARIS 2079 |
37.0042 |
NaN |
C |
100 |
831 |
831 |
832 |
1 |
2 |
Richards, Master. George Sibley |
male |
0.83 |
1 |
1 |
29106 |
18.7500 |
NaN |
S |
100 |
850 |
850 |
851 |
0 |
3 |
Andersson, Master. Sigvard Harald Elias |
male |
4.00 |
4 |
2 |
347082 |
31.2750 |
NaN |
S |
100 |
852 |
852 |
853 |
0 |
3 |
Boulos, Miss. Nourelain |
female |
9.00 |
1 |
1 |
2678 |
15.2458 |
NaN |
C |
100 |
869 |
869 |
870 |
1 |
3 |
Johnson, Master. Harold Theodor |
male |
4.00 |
1 |
1 |
347742 |
11.1333 |
NaN |
S |
100 |
62 rows × 14 columns
midage=test_1[(test_1['Age'] > 10) & (test_1['Age'] < 50)]#返回Age大于10岁,且Age小于50岁的结果
midage.head()
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
a |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
100 |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
100 |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
100 |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
100 |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
100 |
midage1=test_1[(test_1['Age'] > 50) | (test_1['Age'] < 10)]#返回Age小于10岁,和Age大于50岁的结果
midage1
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
a |
6 |
6 |
7 |
0 |
1 |
McCarthy, Mr. Timothy J |
male |
54.0 |
0 |
0 |
17463 |
51.8625 |
E46 |
S |
100 |
7 |
7 |
8 |
0 |
3 |
Palsson, Master. Gosta Leonard |
male |
2.0 |
3 |
1 |
349909 |
21.0750 |
NaN |
S |
100 |
10 |
10 |
11 |
1 |
3 |
Sandstrom, Miss. Marguerite Rut |
female |
4.0 |
1 |
1 |
PP 9549 |
16.7000 |
G6 |
S |
100 |
11 |
11 |
12 |
1 |
1 |
Bonnell, Miss. Elizabeth |
female |
58.0 |
0 |
0 |
113783 |
26.5500 |
C103 |
S |
100 |
15 |
15 |
16 |
1 |
2 |
Hewlett, Mrs. (Mary D Kingcome) |
female |
55.0 |
0 |
0 |
248706 |
16.0000 |
NaN |
S |
100 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
851 |
851 |
852 |
0 |
3 |
Svensson, Mr. Johan |
male |
74.0 |
0 |
0 |
347060 |
7.7750 |
NaN |
S |
100 |
852 |
852 |
853 |
0 |
3 |
Boulos, Miss. Nourelain |
female |
9.0 |
1 |
1 |
2678 |
15.2458 |
NaN |
C |
100 |
857 |
857 |
858 |
1 |
1 |
Daly, Mr. Peter Denis |
male |
51.0 |
0 |
0 |
113055 |
26.5500 |
E17 |
S |
100 |
869 |
869 |
870 |
1 |
3 |
Johnson, Master. Harold Theodor |
male |
4.0 |
1 |
1 |
347742 |
11.1333 |
NaN |
S |
100 |
879 |
879 |
880 |
1 |
1 |
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) |
female |
56.0 |
0 |
1 |
11767 |
83.1583 |
C50 |
C |
100 |
126 rows × 14 columns
midage.loc[[100],['Pclass','Sex']]#取出midage中索引为100的数据并拿出Pclass和Sex的结果(并不是第100个数据)
#索引还是与拿过来时的索引一样,不能代表排列顺序
midage.to_csv('midage.csv')#将生成的midage表格存储
midage1=midage.reset_index(drop=True)#将midage中的索引改成由0开始的连续排列索引(drop=True是为了删除原来索引,不加的话原来的索引会变成数据)
midage1.to_csv('midage1.csv')
midage1.loc[[100],['Pclass','Sex']]#此时取出的是真正的第100个数据
midage.index
Index([ 0, 1, 2, 3, 4, 8, 9, 12, 13, 14,
...
880, 881, 882, 883, 884, 885, 886, 887, 889, 890],
dtype='int64', length=576)
midage1.loc[[100,105,108],['Pclass','Name','Sex']]#取出第100,105,108个数据
|
Pclass |
Name |
Sex |
100 |
2 |
Byles, Rev. Thomas Roussel Davids |
male |
105 |
3 |
Cribb, Mr. John Hatfield |
male |
108 |
3 |
Calic, Mr. Jovo |
male |
midage1.iloc[[100,105,108],[3,4,5]]#用iloc表示将输出行的名称更改为列索引相对应的列数,如Pclass用3表示因为Pclass在从左往右数第3个(从0开始数)
|
Pclass |
Name |
Sex |
100 |
2 |
Byles, Rev. Thomas Roussel Davids |
male |
105 |
3 |
Cribb, Mr. John Hatfield |
male |
108 |
3 |
Calic, Mr. Jovo |
male |
midage1.head(3)
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
a |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
100 |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
100 |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
100 |
sample = pd.DataFrame(np.random.randn(6,4),
index=list('632451'),
columns=list('BDCA'))
sample#随机生成一个6*4的矩阵,并将其改为行索引为123456,列索引叫ABCD的表格
|
B |
D |
C |
A |
6 |
-1.240358 |
-1.264760 |
0.530587 |
-0.641027 |
3 |
-0.774196 |
0.063058 |
0.908744 |
-0.111125 |
2 |
-0.743644 |
-0.142332 |
1.577287 |
0.409604 |
4 |
3.055682 |
-0.207313 |
1.322307 |
0.008019 |
5 |
1.191964 |
0.791010 |
0.306310 |
1.400323 |
1 |
0.113995 |
0.514866 |
-0.219842 |
-0.149131 |
sample.sort_values('B')#按B从小到大排列,axis=0默认按行排序,ascending=False表示按降序排列,inplace=True表示将母本替换
|
B |
D |
C |
A |
6 |
-1.240358 |
-1.264760 |
0.530587 |
-0.641027 |
3 |
-0.774196 |
0.063058 |
0.908744 |
-0.111125 |
2 |
-0.743644 |
-0.142332 |
1.577287 |
0.409604 |
1 |
0.113995 |
0.514866 |
-0.219842 |
-0.149131 |
5 |
1.191964 |
0.791010 |
0.306310 |
1.400323 |
4 |
3.055682 |
-0.207313 |
1.322307 |
0.008019 |
sample.sort_index()#按行索引顺序排列
|
B |
D |
C |
A |
1 |
0.113995 |
0.514866 |
-0.219842 |
-0.149131 |
2 |
-0.743644 |
-0.142332 |
1.577287 |
0.409604 |
3 |
-0.774196 |
0.063058 |
0.908744 |
-0.111125 |
4 |
3.055682 |
-0.207313 |
1.322307 |
0.008019 |
5 |
1.191964 |
0.791010 |
0.306310 |
1.400323 |
6 |
-1.240358 |
-1.264760 |
0.530587 |
-0.641027 |
sample.sort_index(axis=1)#按列顺序排列
|
A |
B |
C |
D |
6 |
-0.641027 |
-1.240358 |
0.530587 |
-1.264760 |
3 |
-0.111125 |
-0.774196 |
0.908744 |
0.063058 |
2 |
0.409604 |
-0.743644 |
1.577287 |
-0.142332 |
4 |
0.008019 |
3.055682 |
1.322307 |
-0.207313 |
5 |
1.400323 |
1.191964 |
0.306310 |
0.791010 |
1 |
-0.149131 |
0.113995 |
-0.219842 |
0.514866 |
sample.sort_index(axis=1,ascending=False)#按列降序排列
|
D |
C |
B |
A |
6 |
-1.264760 |
0.530587 |
-1.240358 |
-0.641027 |
3 |
0.063058 |
0.908744 |
-0.774196 |
-0.111125 |
2 |
-0.142332 |
1.577287 |
-0.743644 |
0.409604 |
4 |
-0.207313 |
1.322307 |
3.055682 |
0.008019 |
5 |
0.791010 |
0.306310 |
1.191964 |
1.400323 |
1 |
0.514866 |
-0.219842 |
0.113995 |
-0.149131 |
sample.sort_values(['B','A'],ascending=False)#任选两列数据降序排列
|
B |
D |
C |
A |
4 |
3.055682 |
-0.207313 |
1.322307 |
0.008019 |
5 |
1.191964 |
0.791010 |
0.306310 |
1.400323 |
1 |
0.113995 |
0.514866 |
-0.219842 |
-0.149131 |
2 |
-0.743644 |
-0.142332 |
1.577287 |
0.409604 |
3 |
-0.774196 |
0.063058 |
0.908744 |
-0.111125 |
6 |
-1.240358 |
-1.264760 |
0.530587 |
-0.641027 |
df=pd.read_csv('train.csv')
df.columns=['乘客ID','是否幸存','乘客等级(1/2/3等舱位)','乘客姓名','性别','年龄','堂兄弟/妹个数','父母与小孩个数','船票信息','票价','客舱','登船港口']
df.sort_values(['票价','年龄'],ascending=False)#先优先按票价排序,在按年龄排序
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
乘客姓名 |
性别 |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
船票信息 |
票价 |
客舱 |
登船港口 |
679 |
680 |
1 |
1 |
Cardeza, Mr. Thomas Drake Martinez |
male |
36.0 |
0 |
1 |
PC 17755 |
512.3292 |
B51 B53 B55 |
C |
258 |
259 |
1 |
1 |
Ward, Miss. Anna |
female |
35.0 |
0 |
0 |
PC 17755 |
512.3292 |
NaN |
C |
737 |
738 |
1 |
1 |
Lesurer, Mr. Gustave J |
male |
35.0 |
0 |
0 |
PC 17755 |
512.3292 |
B101 |
C |
438 |
439 |
0 |
1 |
Fortune, Mr. Mark |
male |
64.0 |
1 |
4 |
19950 |
263.0000 |
C23 C25 C27 |
S |
341 |
342 |
1 |
1 |
Fortune, Miss. Alice Elizabeth |
female |
24.0 |
3 |
2 |
19950 |
263.0000 |
C23 C25 C27 |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
481 |
482 |
0 |
2 |
Frost, Mr. Anthony Wood "Archie" |
male |
NaN |
0 |
0 |
239854 |
0.0000 |
NaN |
S |
633 |
634 |
0 |
1 |
Parr, Mr. William Henry Marsh |
male |
NaN |
0 |
0 |
112052 |
0.0000 |
NaN |
S |
674 |
675 |
0 |
2 |
Watson, Mr. Ennis Hastings |
male |
NaN |
0 |
0 |
239856 |
0.0000 |
NaN |
S |
732 |
733 |
0 |
2 |
Knight, Mr. Robert J |
male |
NaN |
0 |
0 |
239855 |
0.0000 |
NaN |
S |
815 |
816 |
0 |
1 |
Fry, Mr. Richard |
male |
NaN |
0 |
0 |
112058 |
0.0000 |
B102 |
S |
891 rows × 12 columns
x = pd.DataFrame(np.random.randn(4,4),
index=list('3241'),
columns=list('BDCA'))
x
|
B |
D |
C |
A |
3 |
0.207853 |
0.574396 |
0.086197 |
1.187164 |
2 |
-0.794598 |
-0.308106 |
-0.291090 |
-0.150375 |
4 |
0.215895 |
-0.189428 |
0.556125 |
-0.361963 |
1 |
-1.593097 |
-0.205176 |
1.427471 |
-0.339048 |
y = pd.DataFrame(np.random.randn(5,4),
index=list('53241'),
columns=list('BDCE'))
y
|
B |
D |
C |
E |
5 |
0.345871 |
-1.007209 |
-0.047450 |
-0.867648 |
3 |
-1.674230 |
0.874186 |
1.965207 |
-0.291205 |
2 |
1.802405 |
-0.033730 |
1.164470 |
-1.460408 |
4 |
0.501799 |
0.024054 |
-2.140898 |
-0.611685 |
1 |
-1.486755 |
1.780159 |
-2.015993 |
0.259121 |
x+y#只有行和列都相同的才能相加
|
A |
B |
C |
D |
E |
1 |
NaN |
-3.079853 |
-0.588522 |
1.574983 |
NaN |
2 |
NaN |
1.007806 |
0.873381 |
-0.341837 |
NaN |
3 |
NaN |
-1.466377 |
2.051404 |
1.448582 |
NaN |
4 |
NaN |
0.717694 |
-1.584773 |
-0.165374 |
NaN |
5 |
NaN |
NaN |
NaN |
NaN |
NaN |
max(df['堂兄弟/妹个数']+df['父母与小孩个数'])#取最大值
10
df.describe()
|
乘客ID |
是否幸存 |
乘客等级(1/2/3等舱位) |
年龄 |
堂兄弟/妹个数 |
父母与小孩个数 |
票价 |
count |
891.000000 |
891.000000 |
891.000000 |
714.000000 |
891.000000 |
891.000000 |
891.000000 |
mean |
446.000000 |
0.383838 |
2.308642 |
29.699118 |
0.523008 |
0.381594 |
32.204208 |
std |
257.353842 |
0.486592 |
0.836071 |
14.526497 |
1.102743 |
0.806057 |
49.693429 |
min |
1.000000 |
0.000000 |
1.000000 |
0.420000 |
0.000000 |
0.000000 |
0.000000 |
25% |
223.500000 |
0.000000 |
2.000000 |
20.125000 |
0.000000 |
0.000000 |
7.910400 |
50% |
446.000000 |
0.000000 |
3.000000 |
28.000000 |
0.000000 |
0.000000 |
14.454200 |
75% |
668.500000 |
1.000000 |
3.000000 |
38.000000 |
1.000000 |
0.000000 |
31.000000 |
max |
891.000000 |
1.000000 |
3.000000 |
80.000000 |
8.000000 |
6.000000 |
512.329200 |
from matplotlib import pyplot as plt
plt.hist(df['年龄'])#画年龄的直方图
(array([ 54., 46., 177., 169., 118., 70., 45., 24., 9., 2.]),
array([ 0.42 , 8.378, 16.336, 24.294, 32.252, 40.21 , 48.168, 56.126,
64.084, 72.042, 80. ]),
<BarContainer object of 10 artists>)
df['票价'].describe()#票价基本信息
count 891.000000
mean 32.204208
std 49.693429
min 0.000000
25% 7.910400
50% 14.454200
75% 31.000000
max 512.329200
Name: 票价, dtype: float64
plt.hist(df['票价'])#票价的直方图
(array([732., 106., 31., 2., 11., 6., 0., 0., 0., 3.]),
array([ 0. , 51.23292, 102.46584, 153.69876, 204.93168, 256.1646 ,
307.39752, 358.63044, 409.86336, 461.09628, 512.3292 ]),
<BarContainer object of 10 artists>)
df['父母与小孩个数'].describe()#父母与小孩个数基本信息
count 891.000000
mean 0.381594
std 0.806057
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 6.000000
Name: 父母与小孩个数, dtype: float64
plt.hist(df['父母与小孩个数'])#父母与小孩个数的直方图
(array([678., 118., 0., 80., 0., 5., 4., 0., 5., 1.]),
array([0. , 0.6, 1.2, 1.8, 2.4, 3. , 3.6, 4.2, 4.8, 5.4, 6. ]),
<BarContainer object of 10 artists>)
标签:...,
False,
NaN,
第一节,
第一章,
载入,
female,
Mr,
male
From: https://www.cnblogs.com/liuyunhan/p/18069329