import pandas as pd
df=pd.read_excel(r"C:\Users\ying\Desktop\catering_sale.xls")
print(df.describe())
销量 count 200.000000 mean 2755.214700 std 751.029772 min 22.000000 25% 2451.975000 50% 2655.850000 75% 3026.125000 max 9106.440000
箱型图:
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']#用来正确显示中文标签
plt.rcParams['axes.unicode_minus']=False#用来正确显示负号
#解决中文显示问题
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.figure()
p=df.boxplot(return_type='dict')
x=p['fliers'][0].get_xdata()
y=p['fliers'][0].get_ydata()
plt.title("异常值检测箱型图3136",fontsize=50)
y.sort()#从小到大排序
'''
用annnotate添加注释
其中有些相近的点,注释会出现重叠,难以看清,需要一些技巧控制
以下参数都是经过调试的,需要具体问题具体调试
'''
df["销量"].hist(bins=12)
plt.title("概率分布直方图3136",fontsize=50)
gcut = pd.cut(df["销量"],12,right=False)
gcut_count = gcut.value_counts(sort=False)
gcut_count
[22.0, 779.037) 3 [779.037, 1536.073) 1 [1536.073, 2293.11) 19 [2293.11, 3050.147) 133 [3050.147, 3807.183) 40 [3807.183, 4564.22) 2 [4564.22, 5321.257) 0 [5321.257, 6078.293) 0 [6078.293, 6835.33) 1 [6835.33, 7592.367) 0 [7592.367, 8349.403) 0 [8349.403, 9115.524) 1 Name: 销量, dtype: int64
r_zj = pd.DataFrame(gcut_count)
r_zj["销量"].plot(kind = 'bar',
width = 0.8,
figsize = (24,2),
rot = 0,
color = 'k',
grid = True,
alpha = 0.5,
title=('销量频率直方图3136')
)
# coding=utf-8
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 10), dpi=100)
plt.plot(df['日期'], df['销量'])
plt.title("时序图3136",fontsize=50)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
x=np.linspace(0,2*np.pi,25,endpoint=True)
s=np.sin(x)
plt.figure()
plt.plot(x,s,'b-*')
plt.title("y=Sin(x)3136",fontdict={'size':50})
plt.legend("sin(x)")
plt.show()
# 代码3-3 捞起生鱼片的季度销售情况
import pandas as pd
import numpy as np
#catering_sale = r"C:\Users\ying\Desktop\catering_fish_congee.xls" # 餐饮数据
data = pd.read_excel(r"C:\Users\ying\Desktop\catering_fish_congee.xls",names=['date','sale']) # 读取数据,指定“日期”列为索引
bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)',
'[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']
data['sale分层'] = pd.cut(data.sale, bins, labels=labels)
#aggResult = data.groupby(by=['sale分层'])['sale'].agg({'sale': np.size})
#aggResult = data.groupby('sale分层').agg(计数=pd.NamedAgg(column='sale', aggfunc='size'))
aggResult = data.groupby(by=['sale分层'])['sale'].agg([('sale', np.size)])
pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6)) # 设置图框大小尺寸
pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10) # 绘制频率直方图
#aggResult['sale'].plot(kind='bar',width=0.8,fontsize=10) # 绘制频率直方图
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.title('季度销售额频率分布直方图',fontsize=50)
plt.show()
import pandas as pd
df1=pd.read_excel(r"C:\Users\ying\Desktop\data\dish_sale.xls")
print(df1.describe())
# coding=utf-8
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 10), dpi=100)
plt.plot(df1['月份'], df1['A部门'],'-*')
plt.plot(df1['月份'], df1['B部门'],'-*')
plt.plot(df1['月份'], df1['C部门'],'-*')
plt.title('多组线图3136',fontsize=50)
plt.show()
A部门 B部门 C部门 count 12.000000 12.000000 12.000000 mean 6.370833 7.112500 5.616667 std 0.677864 0.734886 0.332575 min 5.300000 5.400000 5.200000 25% 6.007500 6.637500 5.375000 50% 6.250000 7.300000 5.550000 75% 6.625000 7.550000 5.825000 max 8.000000 8.000000 6.200000
#累计概率分布直方图
example_list=[]
n=10000
for i in range(n):
tmp=[np.random.normal()]
example_list.extend(tmp)
width=50
n, bins, patches = plt.hist(example_list,bins = width,color='blue',alpha=0.5)
plt.clf() # clear the figure
X = bins[0:width]+(bins[1]-bins[0])/2.0
bins=bins.tolist()
freq=[f/sum(n) for f in n]
acc_freq=[]
for i in range(0,len(freq)):
if i==0:
temp=freq[0]
else:
temp=sum(freq[:i+1])
acc_freq.append(temp)
plt.plot(X,acc_freq,color='r') # Cumulative probability curve
yt=plt.yticks()
yt1=yt[0].tolist()
def to_percent(temp,position=0): # convert float number to percent
return '%1.0f'%(100*temp) + '%'
ytk1=[to_percent(i) for i in yt1 ]
plt.yticks(yt1,ytk1)
plt.ylim(0,1)
plt.title('累计概率分布直方图3136',fontsize=50)
plt.show()
example_list=[]
n=10000
for i in range(n):
tmp=[np.random.normal()]
example_list.extend(tmp)
width=100
n, bins, patches = plt.hist(example_list,bins = width,color='blue',alpha=0.5)
X = bins[0:width]+(bins[1]-bins[0])/2.0
Y = n
maxn=max(n)
maxn1=int(maxn%8+maxn+8*2)
ydata=list(range(0,maxn1+1,maxn1//8))
yfreq=[str(i/sum(n)) for i in ydata]
plt.plot(X,Y,color='green') #利用返回值来绘制区间中点连线
p1 = np.polyfit(X, Y, 7) #利用7次多项式拟合,返回拟多项式系数,按照阶数从高到低排列
Y1 = np.polyval(p1,X)
plt.plot(X,Y1,color='red')
plt.xlim(-2.5,2.5)
plt.ylim(0)
plt.yticks(ydata,yfreq) #这条语句控制纵坐标是频数或频率,打开是频率,否则是频数
plt.legend(['midpoint','fitting'],ncol=1,frameon=False)
plt.title('概率分布直方图3136',fontsize=50)
plt.show()
import math
import matplotlib.pyplot as plt
import numpy as np
"""简单log函数的实现"""
if __name__ == '__main__':
x = np.arange(0.05,3,0.05)
print(x)
y1 = [math.log(a ,1.5) for a in x]
plt.plot(x,y1,linewidth=2,color="#007500",label='log1.5(x)')
plt.plot([1,1],[y1[0],y1[-1]],"r--",linewidth=2)
y2 = [math.log(a, 2)for a in x]
plt.plot(x, y2, linewidth=2, color="#9F35FF", label="log2(x)")
y3 = [math.log(a, 3) for a in x]
plt.plot(x, y3, linewidth=2, color="#F75000", label="log3(x)")
plt.legend(loc="lower right")
plt.grid(True)
plt.title('对数图3136',fontsize=50)
plt.show()
import seaborn as sns
import pandas as pd
df1=pd.read_excel(r"C:\Users\ying\Desktop\data\dish_sale.xls")
sns.pairplot(data=df1,vars=['A部门', 'B部门'])
plt.title('散点矩阵图3136',fontsize=50)
# 参数说明:
# data指定pairplot()要用到的数据源,hue指定将data中的数据区分显示的依据
# vars指定data中要绘制成散点矩阵图的数据
##模块导入
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#plt.rcParams["font.family"] = 'Arial Unicode MS'##防止在matplotlib中中文不显示
##数据准备
df = pd.read_excel(r"C:\Users\ying\Desktop\data\catering_dish_profit.xls")
#df=df1
#data = pd.Series(df['A部门'].values, index = df['月份'])##将目标数据导入为series元组
data=df['盈利'].copy()
##数据处理
data.sort_values(ascending = False,inplace = True )##对数组进行排序,ascending 升序,inplace代表行和列的排序
p = data.cumsum()/data.sum()##cumsum:累计占比,r语言中也通用
key = p[p>0.8].index[0]##返回累计占比大于0.8的第一个索引名称
key_num = data.index.tolist().index(key)
##开始画图及结果输出
plt.figure(figsize=(60,40))
data.plot(kind = 'bar', color = 'g', alpha = 0.9, width = 0.4,rot=0)##画条形图
p.plot(style = '--ko',secondary_y = True)##画累计占比图
plt.ylabel('盈利(元)',fontsize=50)
plt.axvline(key_num,color='r',linestyle="--",alpha=0.8) ##画红色的虚线
plt.title('帕累托图3136',fontsize=80)
plt.ylabel('盈利(比例)',fontsize=50)
plt.text(key_num+0.2,p[key]-0.05,'累计占比为:%.3f%%' % (p[key]*100), color = 'r') ##在图上写文本
import matplotlib.pyplot as plt #导入绘图包
plt.rcParams['font.sans-serif'] = ['SimHei'] #解决中文显示问题
plt.rcParams['axes.unicode_minus'] = False # 解决中文显示问题
#date = df1.set_index('月份') #把日期列设为索引
#date.index = pd.to_datetime(df1.index) #把索引转为时间格式
#result = date[['成交笔数']].groupby(date.index.year).sum() #按年总计股票成交笔数
plt.pie(df1['A部门'], labels=df1['月份'], autopct='%3.1f%%') #以时间为标签,总计成交笔数为数据绘制饼图,并显示3位整数一位小数
plt.title('A部门每月销售额饼图3136',fontsize=50) #加标题
plt.show()
标签:分析,plot,plt,python,sale,绘图,fontsize,data,bins From: https://www.cnblogs.com/----zcy88888/p/17155259.html