首页 > 编程语言 >第一周 python数据分析与挖掘技术实战 第三章

第一周 python数据分析与挖掘技术实战 第三章

时间:2023-02-26 20:44:06浏览次数:47  
标签:数据分析 plt 第一周 python title sale print np data

总结 

.............

 

图3-1

 

import pandas as pd
catering_sale = 'catering_sale.xls'
data = pd.read_excel(catering_sale,index_col = u'日期')
print(data.describe())
import pandas as pd
import numpy as np
plt.title('3150')
x = np.linspace(0.2*np.pi,25,endpoint=True)
y = np.sin(x)
plt.plot(x, y, 'bp-')
plt.show()

 

 

 

 

 

 

 

 

图3-2

import matplotlib.pyplot as plt #导入图像库
from numpy import nan as NA
import pandas as pd
plt.title('3150')
# 读取数据源
xlsFilename = "catering_sale.xls"
df = pd.read_excel(xlsFilename)
# 缺失值
# 获得缺失值的个数
missingNumCountDf = df.isnull().sum()
print("\n缺失值个数")
print(missingNumCountDf)
# 获得缺失率
missingPercentDf = 100 * (missingNumCountDf / df.count())
print("\n缺失率")
print(missingPercentDf)
# 获得缺失记录
missingDf = df[df.iloc[:,1].isnull()]
print("\n缺失记录")
print(missingDf)
# 过滤掉缺失值
df2 = df.dropna()
# 显示箱图
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#bp = df2.boxplot()
bp = plt.boxplot(df2.values[:,1], patch_artist=True)

fliers = bp['fliers']
for fly in fliers:
  data = fly.get_data()
  for (x,y) in fly.get_xydata():
    plt.text(x, y, "%.2f" % y, verticalalignment="top", horizontalalignment='right')

  print("\n异常值")
  print(data[1])

plt.show()

 

 

 图3-3

import pandas as pd
import numpy as np
catering_sale = 'D:\python\挖掘学习实训/catering_fish_congee.xls' # 餐饮数据
data = pd.read_excel(catering_sale,names=['date','sale']) # 读取数据,指定“日期”列为索引

bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)',
'[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']

data['sale分层'] = pd.cut(data.sale, bins, labels=labels)
aggResult = data.groupby('sale分层').agg({'sale':'count'})


pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100

import matplotlib.pyplot as plt
plt.figure(figsize=(10,6)) # 设置图框大小尺寸
pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10) # 绘制频率直方图
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.title('3150\n季度销售额频率分布直方图',fontsize=20)
plt.show()

 

 

图3-4

import pandas as pd
plt.title('学号:50')
dish_profit = 'D:\python\挖掘学习实训/catering_dish_profit.xls' #餐饮菜品盈利数据
data = pd.read_excel(dish_profit, index_col = u'菜品名')
data = data[u'盈利'].copy()
data.sort_index(ascending = False)
sizes=[i/data.sum() for i in data]
colors=['red','green']
plt.pie(sizes,labels=data.index,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90)
plt.axis('equal')

 

 

图3-8

from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常显示中文
plt.rcParams['axes.unicode_minus'] = False # 正常显示负号'-'

dish_profit = 'D:\python\挖掘学习实训/catering_dish_profit.xls'
data = pd.read_excel(dish_profit, index_col='菜品名')
data = data['盈利'].copy()
# data.sort(ascending=False)
# data.sort_values(ascending=False)
data.sort_index(ascending=False)

plt.figure()
data.plot(kind='bar')
plt.ylabel('盈利(元)')
p = 1.0 * data.cumsum() / data.sum()
p.plot(color='r', secondary_y=True, style='-o', linewidth=2)
plt.annotate(
format(p[6], '.4%'), xy=(6, p[6]), xytext=(6 * 0.9, p[6] * 0.9),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=.2')
) # 添加注释,即85%处的标记,这里包括了指定箭头样式
plt.ylabel('盈利(比例)')
plt.title('学号:3150')
plt.show()

 

 

图3-16 3-17

x = np.linspace(0, 2 * np.pi, 50)
y = np.sin(x)
plt.plot(x, y, "bp--")
plt.title('学号:3150')
plt.show()
# pie
# the slices will be ordered and plotted counter-clockwise
labels = ["frogs", "hogs", "dogs", "logs"]
sizes = [15, 30, 45, 10]
colors = ["yellowgreen", "gold", "lightskyblue", "lightcoral"]
explode = (0, 0.1, 0, 0)
plt.pie(sizes, explode=explode, labels=labels,
    colors=colors, autopct="%1.1f%%", shadow=True, startangle=45)
plt.axis("equal")
plt.title('学号:3150')
plt.show()

 

 

图3-18  3-19

# hist
x = np.random.randn(1000)
plt.hist(x, 10) # 分成10组绘制直方困
plt.title('学号:3150')
plt.show()
# boxplot
x = np.random.randn(1000)
D = pd.DataFrame([x, x + 1]).T
D.plot(kind="box")
plt.title('学号:3150')
plt.show()

 

 

图3-20

# plot logx, logy
x = pd.Series(np.exp(np.arange(20)))
fig = plt.figure(figsize=(8, 4))
axs = fig.subplots(1, 2)
x.plot(ax=axs[0], label=u"原始数据图", legend=True)
x.plot(ax=axs[1], logy=True, label=u"对数数据图", legend=True)
plt.title('学号:3150')
plt.show()

 

标签:数据分析,plt,第一周,python,title,sale,print,np,data
From: https://www.cnblogs.com/gfl411050509/p/17157589.html

相关文章

  • 数据分析
    importnumpyasnpimportpandasaspdimportmatplotlib.pyplotaspltpath='./data/catering_sale.xls'data=pd.read_excel(path,index_col=u'日期')#读取......
  • python数据分析画图
    importnumpyasnpimportpandasaspdimportmatplotlib.pyplotaspltpath='./data/catering_sale.xls'data=pd.read_excel(path,index_col=u'日期')#读取......
  • python数据分析
    -*-coding:utf-8-*-#代码3-1使用describe()方法即可查看数据的基本情况importpandasaspdcatering_sale='../data/catering_sale.xls'#餐饮数据data=p......
  • python数据分析
    #-*-coding:utf-8-*-"""SpyderEditorThisisatemporaryscriptfile."""importpandasaspdfrommatplotlibimportpyplotasplt##读取数据url=r"F:\data\ca......
  • 应用Python进行统计数据画图
     画饼图:#-*-coding:utf-8-*-"""SpyderEditorThisisatemporaryscriptfile."""importpandasaspdimportnumpyasnp#importseabornasimportmatplotli......
  • [oeasy]python0094_视频游戏_双人网球_pong_atari_mos_6502_雅达利_米洛华
    编码进化回忆上次内容上次我们回顾了微软之前的比尔盖茨和保罗艾伦mits迎来的是帮手还是隐患?intel-8080遇到了mos-6502底层硬件驱动游戏行业......
  • Turtlebot4入门教程-演示-创建节点(Python)
    说明:本教程将介绍创建ROS2包和用Python编写ROS2节点的步骤。​​有关C++示例,请单击此处​​。这些步骤与 ​​ROS2教程类似​​​,但侧重于与TurtleBot4的交互......
  • 「Python实用秘技13」Python中临时文件的妙用
    本文完整示例代码及文件已上传至我的Github仓库https://github.com/CNFeffery/PythonPracticalSkills这是我的系列文章「Python实用秘技」的第12期,本系列立足于笔者......
  • 数据挖掘python 画各类图
    ##-*-coding:utf-8-*-#代码3-1使用describe()方法即可查看数据的基本情况importpandasaspdcatering_sale='D://人工智能//catering_sale.xls'#餐饮数据......
  • python基础-json
    importjson#准备列表,列表内每一个元素都是字典,将其转为JSONdate=[{"name":"张大帅","age":11},{"name":"王大锤","age":13},{"name":"赵......