随堂练习,随手记录
2.ipynb
#%%
import numpy as np
import csv
#%%
iris_data = []
with open(r'文件地址')as csvfile:
csv_reader = csv.reader(csvfile)
birth_header = next(csv_reader)
for row in csv_reader:
iris_data.append(row)
print(iris_data)
#%%
iris_list = []
for row in iris_data:
iris_list.append(tuple(row[1:]))
iris_list
#%%
datatype = np.dtype([("Sepal.Length",np.str_,40),
("Sepal.Width",np.str_,40),
("Petal.Length",np.str_,40),
("Petal.Width",np.str_,40),
("Species",np.str_,40)])
print(datatype)
#%%
iris_data = np.array(iris_list,dtype = datatype)
iris_data
#%%
PetalLength = iris_data['Petal.Length'].astype(float)
PetalLength
#%%
np.sort(PetalLength)
#%%
np.unique(PetalLength)
#%%
np.sum(PetalLength)
#%%
np.mean(PetalLength)
#%%
np.std(PetalLength)
#%%
np.var(PetalLength)
#%%
np.min(PetalLength)
#%%
np.max(PetalLength)
3.ipynb
#%%
import numpy as np
import pandas as pd
s = pd.Series([1,2,3,4,5,6,7,8,9])
print(s)
#%%
df = pd.DataFrame(np.random.randint(1, 20,size=(10,4)))
print(df)
#%%
temp1=df.iloc[0]
print(temp1)
#%%
print(df[0])
#%%
print(df.head(3))
#%%
print(df.tail(3))
#%%
print(df[1:3])
#%%
print(df.loc[1,1])
#%%
print(df[df[1] > 0])
#%%
df['add'] = [0,1,2,3,4,5,6,7,8,9]
print(df)
4.ipynb
#%%
import pandas as pd
data = {'sudent': ['zhao', 'qian', 'sun', 'li', 'zhou', 'wu', 'zheng', 'wang'],
'class': [1, 2, 2, 3, 3, 4, 1, 5],
'score': [90, 80, 85, 75, 95, 70, 70,85]}
df = pd.DataFrame(data)
print(df)
print(df.columns)
#%%
pd.DataFrame(data, columns=['class', 'student', 'score'])
#%%
print(df['class'])
print(df.score)
#%%
print(df.index)
#%%
val = pd.Series([10, 9, 8], index=[2, 3, 4])
df['year'] = val
print(df)
#%%
df['isnull'] = df.year.isnull()
print(df)
#%%
del df['isnull']
print(df)
5.ipynb
#%%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%matplotlib inline
#%%
fdata = pd.read_csv(r'E:\Job\code\BigData\chapter05\tips.csv')
fdata.head()
#%%
fdata.describe()
#%%
fdata.rename(columns={'total_bill':'消费总额','tip':'小费','sex':'性别','smoker':'是否抽烟','day':'星期','time':'聚餐时间段','size':'人数'},inplace=True)
fdata.head()
#%%
fdata.plot(kind = 'scatter',x='消费总额',y='小费')
#%%
fdata.groupby('性别')['小费'].mean()
#%%
print(fdata['星期'].unique())
r = fdata.groupby('星期')['小费'].mean()
fig = r.plot(kind = 'bar',x = '星期',y = '小费',fontsize = 12,rot = 30)
fig.axes.title.set_size(16)
#%%
r = fdata.groupby(['性别','是否抽烟'])['小费'].mean()
fig = r.plot(kind = 'bar',x = '星期',y = '小费',fontsize = 12,rot = 30)
fig.axes.title.set_size(16)
#%%
r = fdata.groupby(['聚餐时间段'])['小费'].mean()
fig = r.plot(kind = 'bar',x = '星期',y = '小费',fontsize = 15,rot = 30)
fig.axes.title.set_size(16)
标签:数据分析,iris,#%%,df,随堂,np,fdata,可视化,print
From: https://www.cnblogs.com/IvanKK/p/17936768