1 -*- coding: utf-8 -*- 2 """ 3 Spyder Editor 4 5 This is a temporary script file. 6 """ 7 8 9 import pandas as pd 10 datafile = 'C:/Users/admin/Desktop/air_data.csv' 11 resultfile = 'C:/Users/admin/Desktop/explore.csv' 12 data = pd.read_csv(datafile,encoding = 'utf-8') 13 14 explore = data.describe(percentiles = [],include = 'all').T 15 16 explore['null'] = len(data)-explore['count'] 17 18 explore = explore[['null','max','min']] 19 explore.columns = [u'空数值',u'最大值',u'最小值'] 20 explore.to_csv(resultfile) 21 22 23 import matplotlib.pyplot as plt 24 from datetime import datetime 25 ffp = data['FFP_DATE'].apply(lambda x:datetime.strptime(x,'%Y/%m/%d')) 26 ffp_year = ffp.map(lambda x:x.year) 27 fig = plt.figure(figsize = (8,5)) 28 plt.rcParams['font.sans-serif'] = 'SimHei' 29 plt.rcParams['axes.unicode_minus'] = False 30 plt.hist(ffp_year,bins = 'auto',color = '#0504aa') 31 plt.xlabel('年份') 32 plt.ylabel('入会人数') 33 plt.title('各年份会员入会人数(学号202031014040)') 34 plt.show() 35 plt.close 36 37 male = pd.value_counts(data['GENDER'])['男'] 38 female = pd.value_counts(data['GENDER'])['女'] 39 fig = plt.figure(figsize = (7,4)) 40 plt.pie([male,female],labels = ['男','女'],colors = ['lightskyblue','lightcoral'],autopct = '%1.1f%%') 41 plt.title('会员性别比例(学号2020310143040)') 42 plt.show() 43 plt.close 44 45 lv_four = pd.value_counts(data['FFP_TIER'])[4] 46 lv_five = pd.value_counts(data['FFP_TIER'])[5] 47 lv_six = pd.value_counts(data['FFP_TIER'])[6] 48 fig = plt.figure(figsize =(8,5)) 49 plt.bar(x = range(3),height = [lv_four,lv_five,lv_six],width = 0.4,alpha = 0.8,color = 'skyblue') 50 plt.xticks([index for index in range(3)],['4','5','6']) 51 plt.xlabel('会员等级') 52 plt.ylabel('会员人数') 53 plt.title('会员各级别人数(学号2020310143040)') 54 plt.show() 55 plt.close 56 57 age = data['AGE'].dropna() 58 age = age.astype('int64') 59 fig = plt.figure(figsize = (8,5)) 60 plt.boxplot(age,patch_artist = True,labels = ['会员年龄'],boxprops = {'facecolor':'lightblue'}) 61 plt.title('会员年龄分布箱型图(学号2020310143040)') 62 plt.grid(axis = 'y') 63 plt.show() 64 plt.close
1 # 乘机信息类别 2 lte = data['LAST_TO_END'] 3 fc = data['FLIGHT_COUNT'] 4 sks = data['SEG_KM_SUM'] 5 6 # 绘制最后乘机至结束时长箱线图 7 fig = plt.figure(figsize = (5 ,8)) 8 plt.boxplot(lte, 9 patch_artist=True, 10 labels = ['时长'], # 设置x轴标题 11 boxprops = {'facecolor':'gold'}) # 设置填充颜色 12 plt.title('会员最后乘机至结束时长分布箱线图(学号2020310143040)') 13 # 显示y坐标轴的底线 14 plt.grid(axis='y') 15 plt.show() 16 plt.close 17 # 绘制客户飞行次数箱线图 18 fig = plt.figure(figsize = (5 ,8)) 19 plt.boxplot(fc, 20 patch_artist=True, 21 labels = ['飞行次数'], # 设置x轴标题 22 boxprops = {'facecolor':'gold'}) # 设置填充颜色 23 plt.title('会员飞行次数分布箱线图(学号2020310143040)') 24 # 显示y坐标轴的底线 25 plt.grid(axis='y') 26 plt.show() 27 plt.close 28 # 绘制客户总飞行公里数箱线图 29 fig = plt.figure(figsize = (5 ,10)) 30 plt.boxplot(sks, 31 patch_artist=True, 32 labels = ['总飞行公里数'], # 设置x轴标题 33 boxprops = {'facecolor':'gold'}) # 设置填充颜色 34 plt.title('客户总飞行公里数箱线图(学号2020310143040)') 35 # 显示y坐标轴的底线 36 plt.grid(axis='y') 37 plt.show() 38 plt.close 39 # 积分信息类别 40 # 提取会员积分兑换次数 41 ec = data['EXCHANGE_COUNT'] 42 # 绘制会员兑换积分次数直方图 43 fig = plt.figure(figsize = (8 ,5)) # 设置画布大小 44 plt.hist(ec, bins=5, color='#0504aa') 45 plt.xlabel('兑换次数') 46 plt.ylabel('会员人数') 47 plt.title('会员兑换积分次数分布直方图(学号2020310143040)') 48 plt.show() 49 plt.close 50 # 提取会员总累计积分 51 ps = data['Points_Sum'] 52 # 绘制会员总累计积分箱线图 53 fig = plt.figure(figsize = (5 ,8)) 54 plt.boxplot(ps, 55 patch_artist=True, 56 labels = ['总累计积分'], # 设置x轴标题 57 boxprops = {'facecolor':'gold'}) # 设置填充颜色 58 plt.title('客户总累计积分箱线图(学号2020310143040)') 59 # 显示y坐标轴的底线 60 plt.grid(axis='y') 61 plt.show() 62 plt.close
1 # 提取属性并合并为新数据集 2 data_corr = data[['FFP_TIER','FLIGHT_COUNT','LAST_TO_END', 3 'SEG_KM_SUM','EXCHANGE_COUNT','Points_Sum']] 4 age1 = data['AGE'].fillna(0) 5 data_corr['AGE'] = age1.astype('int64') 6 data_corr['ffp_year'] = ffp_year 7 8 # 计算相关性矩阵 9 dt_corr = data_corr.corr(method = 'pearson') 10 print('相关性矩阵为(学号2020310143040):\n',dt_corr) 11 12 # 绘制热力图 13 import seaborn as sns 14 plt.subplots(figsize=(10, 10)) # 设置画面大小 15 sns.heatmap(dt_corr, annot=True, vmax=1, square=True, cmap='Blues') 16 plt.title('热力图(学号2020310143040)') 17 plt.show() 18 plt.close
标签:plt,学号,航空公司,show,客户,2020310143040,close,价值,data From: https://www.cnblogs.com/i3wood/p/17191780.html