首页 > 其他分享 >Bk5_Ch18_01

Bk5_Ch18_01

时间:2024-10-12 17:13:24浏览次数:9  
标签:given 01 joint Ch18 Bk5 set ax x1 True

y_counts = y_df.value_counts()

这个地方要改成

y_counts = y_df.value_counts('label')

###############

Authored by Weisheng Jiang

Book 5 | From Basic Arithmetic to Machine Learning

Published and copyrighted by Tsinghua University Press

Beijing, China, 2022

###############

import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt 
import pandas as pd  
from sklearn.datasets import load_iris

plt.close('all')

iris = load_iris()
# A copy from Sklearn

X_1_to_4 = iris.data
y = iris.target

feature_names = ['Sepal length, $X_1$','Sepal width, $X_2$',
                 'Petal length, $X_3$','Petal width, $X_4$']

X_df = pd.DataFrame(X_1_to_4, columns=feature_names)
y_df = pd.DataFrame(y, columns=['label'])

y_df[y_df==0] = 'C_1'
y_df[y_df==1] = 'C_2'
y_df[y_df==2] = 'C_3'

X1_df = X_df['Sepal length, $X_1$']

#%% likelihood PDF, given class Y

# given C1 (y = 0)

x1 = np.linspace(4,8,161)

fig, ax = plt.subplots()

KDE_C1 = sm.nonparametric.KDEUnivariate(X1_df[y==0])
KDE_C1.fit(bw=0.1)

f_x1_given_C1 = KDE_C1.evaluate(x1)

ax.fill_between(x1, f_x1_given_C1, facecolor = '#FF3300',alpha = 0.2)
ax.plot(x1, f_x1_given_C1,color = '#FF3300', 
        label = '$f_{X1|Y}(x_1|C_1)$, likelihood')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('PDF')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# given C2 (y = 1)

fig, ax = plt.subplots()

KDE_C2 = sm.nonparametric.KDEUnivariate(X1_df[y==1])
KDE_C2.fit(bw=0.1)

f_x1_given_C2 = KDE_C2.evaluate(x1)

ax.fill_between(x1, f_x1_given_C2, facecolor = '#0099FF',alpha = 0.2)
ax.plot(x1, f_x1_given_C2,color = '#0099FF',
        label = '$f_{X1|Y}(x_1|C_2)$, likelihood')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('PDF')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# given C3 (y = 2)

fig, ax = plt.subplots()

KDE_C3 = sm.nonparametric.KDEUnivariate(X1_df[y==2])
KDE_C3.fit(bw=0.1)

f_x1_given_C3 = KDE_C3.evaluate(x1)

ax.fill_between(x1, f_x1_given_C3, facecolor = '#8A8A8A',alpha = 0.2)
ax.plot(x1, f_x1_given_C3,color = '#8A8A8A',
        label = '$f_{X1|Y}(x_1|C_3)$, likelihood')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('PDF')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

#%% compare three likelihood curves


fig, ax = plt.subplots()

ax.fill_between(x1, f_x1_given_C1, facecolor = '#FF3300',alpha = 0.2)
ax.plot(x1, f_x1_given_C1,color = '#FF3300', label = '$f_{X1|Y}(x_1|C_1)$')

ax.fill_between(x1, f_x1_given_C2, facecolor = '#0099FF',alpha = 0.2)
ax.plot(x1, f_x1_given_C2,color = '#0099FF', label = '$f_{X1|Y}(x_1|C_2)$')

ax.fill_between(x1, f_x1_given_C3, facecolor = '#8A8A8A',alpha = 0.2)
ax.plot(x1, f_x1_given_C3,color = '#8A8A8A', label = '$f_{X1|Y}(x_1|C_3)$')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('Likelihood PDF')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

#%% prior probability

y_counts = y_df.value_counts()

#Plot the data:
my_colors = ['#FF3300', '#0099FF', '#8A8A8A']

fig, ax = plt.subplots()

y_counts.plot.bar(color=my_colors)

plt.show()

y_prob = y_counts/y_df.count().values[0]
plt.ylabel('Count')

fig, ax = plt.subplots()

y_prob.plot.bar(color=my_colors)
plt.ylabel('Prior probability')

#%% Joint PDF

f_x1_joint_C1 = f_x1_given_C1*y_prob['C_1']
f_x1_joint_C2 = f_x1_given_C2*y_prob['C_2']
f_x1_joint_C3 = f_x1_given_C3*y_prob['C_3']

# C1

fig, ax = plt.subplots()

# Conditional likelihood
ax.plot(x1, f_x1_given_C1,color = '#FF3300', linestyle = '--',
        label = '$f_{X1|Y}(x_1|C_1)$, likelihood')

# Joint
ax.fill_between(x1, f_x1_joint_C1, facecolor = '#FF3300',alpha = 0.2)
ax.plot(x1, f_x1_joint_C1,color = '#FF3300', 
        label = '$f_{X1,Y}(x_1,C_1)$, joint')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('$PDF$')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# C2

fig, ax = plt.subplots()

# Conditional likelihood
ax.plot(x1, f_x1_given_C2,color = '#0099FF', linestyle = '--', 
        label = '$f_{X1|Y}(x_1|C_2)$, likelihood')

# Joint
ax.fill_between(x1, f_x1_joint_C2, facecolor = '#0099FF',alpha = 0.2)
ax.plot(x1, f_x1_joint_C2,color = '#0099FF', 
        label = '$f_{X1,Y}(x_1,C_2)$, joint')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('$PDF$')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# C3

fig, ax = plt.subplots()

# Conditional likelihood
ax.plot(x1, f_x1_given_C3,color = '#8A8A8A', linestyle = '--', 
        label = '$f_{X1|Y}(x_1|C_3)$, likelihood')

# Joint
ax.fill_between(x1, f_x1_joint_C3, facecolor = '#8A8A8A',alpha = 0.2)
ax.plot(x1, f_x1_joint_C3,color = '#8A8A8A', 
        label = '$f_{X1,Y}(x_1,C_3)$, joint')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('$PDF$')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# compare joint 

fig, ax = plt.subplots()

# Joint
ax.fill_between(x1, f_x1_joint_C1, facecolor = '#FF3300',alpha = 0.2)
ax.plot(x1, f_x1_joint_C1,color = '#FF3300', 
        label = '$f_{X1,Y}(x_1,C_1)$')

ax.fill_between(x1, f_x1_joint_C2, facecolor = '#0099FF',alpha = 0.2)
ax.plot(x1, f_x1_joint_C2,color = '#0099FF', 
        label = '$f_{X1,Y}(x_1,C_2)$')

ax.fill_between(x1, f_x1_joint_C3, facecolor = '#8A8A8A',alpha = 0.2)
ax.plot(x1, f_x1_joint_C3,color = '#8A8A8A', 
        label = '$f_{X1,Y}(x_1,C_3)$')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])
ax.set_ylabel('Conditional PDF')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

#%% Evidence fX_1(x_1)

f_x1 = f_x1_joint_C1 + f_x1_joint_C2 + f_x1_joint_C3

fig, ax = plt.subplots()

ax.plot(x1, f_x1,color = '#00448A', 
        label = '$f_{X1}(x_1)$, evidence (marginal)')
ax.fill_between(x1, f_x1, facecolor = '#00448A',alpha = 0.1)

ax.plot(x1, f_x1_joint_C1,color = '#FF3300', 
        label = '$f_{X1,Y}(x_1,C_1)$')
ax.plot(x1, f_x1_joint_C2,color = '#0099FF', 
        label = '$f_{X1,Y}(x_1,C_2)$')
ax.plot(x1, f_x1_joint_C3,color = '#8A8A8A', 
        label = '$f_{X1,Y}(x_1,C_3)$')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,0.75])
ax.set_yticks([0, 0.5])
ax.set_xlim([4,8])
ax.set_ylabel('PDF')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()


#%%

#%% Posterior

f_C1_given_x1 = f_x1_joint_C1/f_x1
f_C2_given_x1 = f_x1_joint_C2/f_x1
f_C3_given_x1 = f_x1_joint_C3/f_x1

# C1

fig, (ax1, ax2, ax3) = plt.subplots(3)

# joint C1
ax1.plot(x1, f_x1_joint_C1,color = '#FF3300',
         label = '$f_{x1, Y}(x_1, C_1)$, joint')
ax1.fill_between(x1, f_x1_joint_C1, facecolor = '#FF3300',alpha = 0.2)
ax1.set_ylim([0,1])
ax1.set_yticks([0, 0.5, 1])
ax1.set_xlim([4,8])
ax1.set_xticks([])
ax1.legend()

# marginal, x1
ax2.plot(x1, f_x1, color = '#00448A',
                 label = '$f_{X1}(x_1)$, marginal')
ax2.fill_between(x1, f_x1, facecolor = '#00448A',alpha = 0.1)
ax2.set_ylim([0,1])
ax2.set_yticks([0, 0.5, 1])
ax2.set_xlim([4,8])
ax2.set_xticks([])
ax2.legend()

# given x1, probability of C1
ax3.fill_between(x1, f_C1_given_x1, facecolor = '#FF3300',alpha = 0.2)
ax3.plot(x1, f_C1_given_x1,color = '#FF3300',
         label = '$f_{Y|X1}(C_1|x_1)$, posterior')

ax3.autoscale(enable=True, axis='x', tight=True)
ax3.autoscale(enable=True, axis='y', tight=True)
ax3.set_ylim([0,1])
ax3.set_yticks([0, 0.5, 1])
ax3.set_xlim([4,8])
ax3.set_xlabel('Sepal length, $x_1$')
ax3.legend()

# C2

fig, (ax1, ax2, ax3) = plt.subplots(3)

# joint C2
ax1.plot(x1, f_x1_joint_C2,color = '#0099FF',
         label = '$f_{x1, Y}(x_1, C_2)$, joint')
ax1.fill_between(x1, f_x1_joint_C2, facecolor = '#0099FF',alpha = 0.2)
ax1.set_ylim([0,1])
ax1.set_yticks([0, 0.5, 1])
ax1.set_xlim([4,8])
ax1.set_xticks([])
ax1.legend()

# marginal, x1
ax2.plot(x1, f_x1, color = '#00448A',
                 label = '$f_{X1}(x_1)$, marginal')
ax2.fill_between(x1, f_x1, facecolor = '#00448A',alpha = 0.1)
ax2.set_ylim([0,1])
ax2.set_yticks([0, 0.5, 1])
ax2.set_xlim([4,8])
ax2.set_xticks([])
ax2.legend()

# given x1, probability of C2
ax3.fill_between(x1, f_C2_given_x1, facecolor = '#0099FF',alpha = 0.2)
ax3.plot(x1, f_C2_given_x1,color = '#0099FF',
         label = '$f_{Y|X1}(C_2|x_1)$, posterior')

ax3.autoscale(enable=True, axis='x', tight=True)
ax3.autoscale(enable=True, axis='y', tight=True)
ax3.set_ylim([0,1])
ax3.set_yticks([0, 0.5, 1])
ax3.set_xlim([4,8])
ax3.set_xlabel('Sepal length, $x_1$')
ax3.legend()


# C3

fig, (ax1, ax2, ax3) = plt.subplots(3)

# joint C3
ax1.plot(x1, f_x1_joint_C3,color = '#8A8A8A',
         label = '$f_{x1, Y}(x_1, C_3)$, joint')
ax1.fill_between(x1, f_x1_joint_C3, facecolor = '#8A8A8A',alpha = 0.2)
ax1.set_ylim([0,1])
ax1.set_yticks([0, 0.5, 1])
ax1.set_xlim([4,8])
ax1.set_xticks([])
ax1.legend()

# marginal, x1
ax2.plot(x1, f_x1, color = '#00448A',
                 label = '$f_{X1}(x_1)$, marginal')
ax2.fill_between(x1, f_x1, facecolor = '#00448A',alpha = 0.1)
ax2.set_ylim([0,1])
ax2.set_yticks([0, 0.5, 1])
ax2.set_xlim([4,8])
ax2.set_xticks([])
ax2.legend()

# given x1, probability of C3
ax3.fill_between(x1, f_C3_given_x1, facecolor = '#8A8A8A',alpha = 0.2)
ax3.plot(x1, f_C3_given_x1,color = '#8A8A8A',
         label = '$f_{Y|X1}(C_3|x_1)$, posterior')

ax3.autoscale(enable=True, axis='x', tight=True)
ax3.autoscale(enable=True, axis='y', tight=True)
ax3.set_ylim([0,1])
ax3.set_yticks([0, 0.5, 1])
ax3.set_xlim([4,8])
ax3.set_xlabel('Sepal length, $x_1$')
ax3.legend()

#%% compare three Posterior curves


fig, ax = plt.subplots()

ax.fill_between(x1, f_C1_given_x1, facecolor = '#FF3300',alpha = 0.2)
ax.plot(x1, f_C1_given_x1,color = '#FF3300', label = '$f_{Y|X1}(C_1|x_1)$')

ax.fill_between(x1, f_C2_given_x1, facecolor = '#0099FF',alpha = 0.2)
ax.plot(x1, f_C2_given_x1,color = '#0099FF', label = '$f_{Y|X1}(C_2|x_1)$')

ax.fill_between(x1, f_C3_given_x1, facecolor = '#8A8A8A',alpha = 0.2)
ax.plot(x1, f_C3_given_x1,color = '#8A8A8A', label = '$f_{Y|X1}(C_3|x_1)$')

ax.axhline(y = 1, color = 'k', linestyle = '--')

ax.autoscale(enable=True, axis='x', tight=True)
ax.autoscale(enable=True, axis='y', tight=True)
ax.set_ylim([0,1])
ax.set_yticks([0, 0.5, 1])
ax.set_xlim([4,8])
ax.set_ylabel('Posterior probability')
ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

#%%

#%% compare posterior, likelihood, marginal (evidence), and joint

fig, ax = plt.subplots()

# posterior
ax.plot(x1, f_C1_given_x1,color = 'r',label = '$f_{Y|X1}(C_1|x_1)$, posterior')

# likelihood
ax.plot(x1, f_x1_given_C1,color = '#0099FF',label = '$f_{X1|Y}(x_1|C_1)$, likelihood')
ax.fill_between(x1, f_x1_given_C1,alpha = 0.2,color = '#0099FF')

# marginal (evidence)
ax.plot(x1, f_x1, color = '#00448A',label = '$f_{X1}(x_1)$, evidence (marginal)')

# joint
ax.plot(x1, f_x1_joint_C1, color = '#92D050', label = '$f_{X1,Y}(x_1,C_1)$, joint')
ax.fill_between(x1,f_x1_joint_C1, 
                edgecolor = 'k',
                hatch='///',
                facecolor="none")

ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])

ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# C2

fig, ax = plt.subplots()

# posterior
ax.plot(x1, f_C2_given_x1,color = 'r',label = '$f_{Y|X1}(C_2|x_1)$, posterior')

# likelihood
ax.plot(x1, f_x1_given_C2,color = '#0099FF',label = '$f_{X1|Y}(x_1|C_2)$, likelihood')
ax.fill_between(x1, f_x1_given_C2, alpha = 0.2,color = '#0099FF')

# marginal (evidence)
ax.plot(x1, f_x1, color = '#00448A',label = '$f_{X1}(x_1)$, evidence (marginal)')

# joint
ax.plot(x1, f_x1_joint_C2, color = '#92D050', label = '$f_{X1,Y}(x_1,C_2)$, joint')
ax.fill_between(x1,f_x1_joint_C2, 
                edgecolor = 'k',
                hatch='///',
                facecolor="none")

ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])

ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

# C3

fig, ax = plt.subplots()

# posterior
ax.plot(x1, f_C3_given_x1,color = 'r',label = '$f_{Y|X1}(C_3|x_1)$, posterior')

# likelihood
ax.plot(x1, f_x1_given_C3,color = '#0099FF',label = '$f_{X1|Y}(x_1|C_3)$, likelihood')
ax.fill_between(x1, f_x1_given_C3, alpha = 0.2,color = '#0099FF')

# marginal (evidence)
ax.plot(x1, f_x1, color = '#00448A', label = '$f_{X1}(x_1)$, evidence (marginal)')

# joint
ax.plot(x1, f_x1_joint_C3, color = '#92D050', label = '$f_{X1,Y}(x_1,C_3)$, joint')
ax.fill_between(x1,f_x1_joint_C3, 
                edgecolor = 'k',
                hatch='///',
                facecolor="none")

ax.set_ylim([0,1.5])
ax.set_yticks([0, 0.5, 1, 1.5])
ax.set_xlim([4,8])

ax.set_xlabel('Sepal length, $x_1$')
ax.legend()

标签:given,01,joint,Ch18,Bk5,set,ax,x1,True
From: https://www.cnblogs.com/redufa/p/18460921

相关文章

  • 『板刷 AGC』[AGC017] A~E 做题记录
    这场打得更菜了,只会A,B,D,没办法,人机是这样的,我还是太菜了。A:Biscuits人机计数题。一个直接的思路是把\(a\)的所有数对\(2\)取模,然后选出\(m\)个\(a_i=1\)的\(i\)满足\(m\bmod2=p\),而剩下的\(a_i=0\)的\(i\)就是可选可不选。设\(s=\sum_{i=1}^n[a_i\bmod2=......
  • 01-函数、极限、连续性、导数
    为了加深在人工智能、深度学习领域的学习,接下来会推出数学基础系列博客,加深自己在这领域的基础知识。一、函数1、函数的定义函数表示量与量之间的关系如:A=πr2A=πr2。更普遍的是用y=f(x)y=f(x)表示,其中x表示自变量,y表示因变量。函数在x0处取得的函数值y0=y∣x=x0=f(x0)y0=y∣......
  • 2013年国赛高教杯数学建模A题车道被占用对城市道路通行能力的影响解题全过程文档及程
    2013年国赛高教杯数学建模A题车道被占用对城市道路通行能力的影响  车道被占用是指因交通事故、路边停车、占道施工等因素,导致车道或道路横断面通行能力在单位时间内降低的现象。由于城市道路具有交通流密度大、连续性强等特点,一条车道被占用,也可能降低路段所有车道的......
  • 2011-2022年各省金融监管水平数据(含原始数据+计算过程+计算代码)
    2011-2022年各省金融监管水平数据(含原始数据+计算过程+计算代码)1、时间:2011-2022年2、来源:国家统计局、统计年鉴3、指标:金融业增加值、金融监管支出、金融监管水平4、计算方法:金融监管水平=金融监管支出/金融业增加值5、指标解释:金融监管水平是指政府及其指定机构通过法......
  • E65 树形DP P3237 [HNOI2014] 米特运输
    视频链接:E65树形DPP3237[HNOI2014]米特运输_哔哩哔哩_bilibili  P3237[HNOI2014]米特运输-洛谷|计算机科学教育新生态(luogu.com.cn)//树形DPO(n)#include<bits/stdc++.h>#defineintlonglongusingnamespacestd;constintN=500005,mod=1e9+7;......
  • 20241010 模拟赛
    想看题的戳这里A.植物收集难度:绿先讲一下\(O(n^3)\)的暴力:枚举一下要用多少个\(k\)。将价格排序,假设要用\(x\)个\(k\),则每个数会对其右边\(x\)个数产生贡献,按价格从小到大计算贡献。优化一下,每次增加一个\(k\),则每株植物最多往右边贡献\(1\)个,所以每次往右边枚举......
  • DLJD_Docker学习_01
    第1章Docker概述1.1课程引入开发/运维互掐1.1.1开发与测试和运维间的矛盾,主要是由于环境的不同而引发的。如果能将开发人员使用的环境交给测试与运维使用,这些问题就都能解决。1.1.2DevOpsDevOps是一种思想,是一种管理模式,是一种执行规范与标准。它主要是用于促进开发、......
  • VS2019/2022配置C++ OpenCV4.10.0环境
    一、下载opencv4.10.0官网链接:https://opencv.org/ 安装的时候记住安装路径,本人安装到E盘 二、新建C++项目1、本人新建C++/CLR.Netframework项目 2、右击打开C++项目属性2.1、添加包含目录 此处本人配置的是绝对地址,拷贝build文件夹到程序目录,然后配置相对地......
  • SS241012B. 电梯(lift)
    SS241012B.电梯(lift)题意你有\(n\)种货物,每种货物有一个高度\(f\)和体积\(w\)。其中\(w\)表示体积是\(2^w\)。你有一个大小为\(2^m\)的背包,一个背包的花费是背包物品的最大高度,问使用若干个背包装完物品的最小代价。思路膜拜黄队%%%感觉黄队的做法比题解好。首先一......
  • 20241010
    表格游戏我们看到这么小的数据范围,可以想到暴搜,但是时间复杂度来到了\(2^{30}\),考虑折半搜索,那么其实看起来是\(2^{22}\times15\)的,但是实际测评中跑不满,所以可以\(AC\)AdjustThePresentation(EasyVersion)根据题意,他如果给一个人看过了幻灯片,那么这个人可......