作者,Evil Genius
HD数据不同于Xenium,目前还是横屏竖直的一刀切数据分析模式,但是真实的细胞绝对不是如此分布的,那么实际分析中,2um的精度配合图像的信息,获取真实的细胞分布数据,就成了分析的必须。
多说一句,分析的准确性和超前化也是公司对核心分析人员的核心要求。
如下如,我们最好不要采用8um,16um这种一刀切的模式,而是识别每个2um中是否含有细胞,如果有则保留,没有则舍弃。
这个时候拿到的数据才是有效数据,所做的分析才可以认为是非常可靠的。利用2um的精度重构细胞分布信息。
在Visium HD的最高2µm分辨率下,将亚细胞bin连接成单个细胞。这可以通过使用StarDist进行形态分割来完成的,使用其预训练的H&E模型识别细胞核,随后将其扩展到邻近的未标记的bin中。
我们需要实现如下的分析目标
分析得到的结果流程图
分析得到的结果
局部放大
我们来实现,是官方示例数据
import scanpy as sc
import os
import bin2cell as b2c
import celltypist
from celltypist import models
import numpy as np
from matplotlib import rcParams
from matplotlib import font_manager
import matplotlib.pyplot as plt
rcParams['pdf.fonttype'] = 42
sc.settings.set_figure_params(dpi = 150, color_map = 'RdPu', dpi_save = 150, vector_friendly = True, format = 'pdf')
font_manager.fontManager.addfont(".../software/Arial.ttf")
print(font_manager.findfont("Arial"))
plt.rcParams["font.sans-serif"] = ["Arial"]
sc.settings.set_figure_params(dpi = 150, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'pdf')
path008 = ".../visium_hd/gut_public/square_008um/"
path002 = ".../visium_hd/gut_public/square_002um/"
source_image_path = ".../10X_datasets/human_CRC/Visium_HD_Human_Colon_Cancer_tissue_image.btf"
bdata = b2c.read_visium(path008, source_image_path = source_image_path)
bdata.var_names_make_unique()
bdata.raw = bdata.copy()
基础处理
sc.pp.filter_genes(bdata, min_cells=3)
sc.pp.filter_cells(bdata, min_genes=100)
sc.pp.calculate_qc_metrics(bdata,inplace=True)
sc.pp.highly_variable_genes(bdata,n_top_genes=5000,flavor="seurat_v3")
sc.pp.normalize_total(bdata,target_sum=1e4)
sc.pp.log1p(bdata)
Predict cell types from 2 models and combine them
predictions_8bin_mega = celltypist.annotate(bdata, model ='/nfs/team205/rb29/VisiumHD_intestine/Src/celltypist_models/model_from_megaGut_colon_CRC_level3.pkl', majority_voting = False)
predictions_8bin_crc = celltypist.annotate(bdata, model = 'Human_Colorectal_Cancer.pkl', majority_voting = False)
# combine 2 models for cell annotations
# add the healthy gut model
bdata = predictions_8bin_mega.to_adata()
bdata.obs['predicted_labels_healthy'] = bdata.obs['predicted_labels']
bdata.obs['conf_score_healthy'] = bdata.obs['conf_score']
# add the colorectal cancer model
bdata = predictions_8bin_crc.to_adata()
bdata.obs['predicted_labels_crc'] = bdata.obs['predicted_labels']
bdata.obs['conf_score_crc'] = bdata.obs['conf_score']
# remove old annotations
del bdata.obs['predicted_labels']
del bdata.obs['conf_score']
# find the cell that have higher confidence in the crc model
bdata.obs['higher_in_crc'] = bdata.obs['conf_score_healthy']<bdata.obs['conf_score_crc']
# eclude the ones that labeled as unknown in crc model
bdata.obs.loc[bdata.obs['predicted_labels_crc']=='Unknown','higher_in_crc'] = False
bdata.obs['higher_in_crc'].value_counts()
# create new unified annotations
bdata.obs['predicted_labels'] = bdata.obs['predicted_labels_healthy']
bdata.obs['predicted_labels'] = bdata.obs['predicted_labels'].astype('object')
bdata.obs.loc[bdata.obs['higher_in_crc'],'predicted_labels'] = bdata.obs.loc[bdata.obs['higher_in_crc'],'predicted_labels_crc']
bdata.obs['predicted_labels'] = bdata.obs['predicted_labels'].astype('category')
bdata.obs['predicted_labels']
bdata = bdata[:, bdata.var["highly_variable"]].copy()
sc.pp.scale(bdata, max_value=10)
sc.pp.pca(bdata, use_highly_variable=True)
sc.pp.neighbors(bdata)
sc.tl.umap(bdata)
sc.set_figure_params(dpi=50,fontsize=10,)
sc.pl.violin(bdata, ['n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes'],
jitter=0.1, multi_panel=True)
estimate embedding
sc.set_figure_params(dpi=100,fontsize=10,)
os.chdir('.../visium_hd/gut_public/')
sc.tl.leiden(bdata,resolution=6,key_added='leiden')
sc.pl.umap(bdata,color=['leiden'],size=2,wspace=0.25,frameon=False)
####bdata.raw.to_adata().write('.../visium_hd/gut_public/crc_8um.h5ad')
结合图像和数据进行分析
cdata = sc.read_h5ad(path002+'b2c_crc.h5ad')
cdata.var_names_make_unique()
cdata = cdata[cdata.obs['bin_count']>5] # min 6 bins
#need integers for seuratv3 hvgs
cdata.X.data = np.round(cdata.X.data)
cdata.raw = cdata.copy()
sc.pp.filter_genes(cdata, min_cells=3)
sc.pp.filter_cells(cdata,min_genes=100)
sc.pp.calculate_qc_metrics(cdata,inplace=True)
sc.pp.highly_variable_genes(cdata,n_top_genes=5000,flavor="seurat_v3")
sc.pp.normalize_total(cdata,target_sum=1e4)
标签:pp,图像识别,bdata,genes,---,cdata,sc,obs,HD
From: https://blog.csdn.net/weixin_53637133/article/details/140149084