单细胞转录组实战03: 使用celltypist注释细胞

标签：03 True res cluster 转录 key celltypist adata

上一步得到了质控和整合后的数据，这一步需要聚类分群和细胞注释

from pathlib import Path
import re
from io import StringIO

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import celltypist

OUTPUT_DIR='output/02.Cell_Type'
Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)

自定义函数

flatten = lambda nest_list: sum(([x] if not isinstance(x, list) else flatten(x) for x in nest_list), [])
def label_helper(number_of_cluster: int):
  _s1 = ",\n".join([str(i) for i in range(number_of_cluster+1)])
  _s2 = "\nnew_cluster_names ='''\n" + _s1 + ",\n'''\n"
  print(_s2)
def labeled(
  adata: sc.AnnData, 
  cluster_names: str, 
  reference_key: str, 
  cell_type_key: str = 'CellType', 
  inplace: bool = True
  ):
  _adata = adata if inplace else adata.copy()
  _ref_df = _adata.obs.loc[:, [reference_key]]
  _annot_df = pd.read_csv(StringIO(cluster_names), header=None, dtype='object')
  _adata.obs[cell_type_key] = pd.merge(
    _ref_df, _annot_df, left_on=reference_key, right_on=0, how='left')[1].values
  return None if inplace else _adata

分为大群

手动先分为Epi、Endo、Fib和免疫细胞。

adata = sc.read_h5ad('output/01.preprocess/adata.h5')

分辨率为1

res = '1'
sc.tl.leiden(adata,resolution=float(res),key_added=f'leiden_{res}',random_state=1314)
sc.pl.umap(adata, color=f'leiden_{res}',legend_loc='on data')

主要的marker点图

major = {'Fib':['MME','COL1A1','PDGFRA','COL1A2'],'Endo':['PECAM1','RAMP2'],'Epi':['EPCAM'],'Immune':'PTPRC'}
sc.pl.dotplot(adata,var_names=major,groupby=f'leiden_{res}',dot_max=0.5,dot_min=0.1)

主要的marker UMAP图

sc.pl.umap(adata,color=flatten(list(major.values())))

注释

label_helper(22)
new_cluster_names ='''
0,
1,
2,
3,
4,
5,Epi
6,
7,
8,
9,
10,
11,
12,Fib
13,
14,
15,Endo
16,
17,
18,
19,
20,
21,
22,Epi
'''

细胞大群UMAP图

labeled(adata,cluster_names=new_cluster_names,reference_key=f'leiden_{res}',cell_type_key='CellTypeS1')
adata.obs.CellTypeS1.fillna('Immune',inplace=True)
sc.pl.umap(adata,color='CellTypeS1');

celltypist注释

下载celltypist的模型

celltypist.models.download_models()

预测免疫大群

_, ax = plt.subplots(1, 1, figsize=(5, 5))
predictions = celltypist.annotate(adata, model = 'Immune_All_High.pkl')
dp = celltypist.dotplot(predictions, 
    use_as_reference = 'leiden_'+res, use_as_prediction = 'predicted_labels',
    filter_prediction=0.1,
    return_fig=True
    ,ax=ax
    )
dp.style(grid=True,cmap = 'RdBu_r').show();

预测详细的免疫细胞

_, ax = plt.subplots(1, 1, figsize=(5, 5))
predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl')
dp = celltypist.dotplot(predictions, use_as_reference = 'leiden_'+res, filter_prediction=0.2,use_as_prediction = 'predicted_labels',return_fig=True,ax=ax)
dp.style(grid=True,cmap = 'RdBu_r').show();

Human_Lung_Atlas预测

_, ax = plt.subplots(1, 1, figsize=(5, 5))
predictions = celltypist.annotate(adata, model = 'Human_Lung_Atlas.pkl')
dp = celltypist.dotplot(predictions, use_as_reference = 'leiden_'+res, filter_prediction=0.2,use_as_prediction = 'predicted_labels',return_fig=True,ax=ax)
dp.style(grid=True,cmap = 'RdBu_r').show()

看一下Treg细胞主要在哪里表达

# Treg
sc.pl.umap(adata,color=['TNFRSF9','FOXP3'])

注释细胞

new_cluster_names ='''
0,CD4+ T
1,CD4+ T
2,CD8+ T
3,CD4+ T
4,Mac
5,Epi
6,CD4+ T
7,Treg
8,CD8+ T
9,DC
10,CD8+ T
11,Mast
12,Fib
13,B
14,Treg
15,Endo
16,T
17,DC
18,CD4+ T
19,Plasma
20,DC
21,pDC
22,Epi
'''

细胞分群UMAP图

labeled(adata,cluster_names=new_cluster_names,reference_key=f'leiden_{res}',cell_type_key='CellTypeS2')
sc.pl.umap(adata, color='CellTypeS2',legend_loc='on data')

写出数据

adata.write_h5ad(OUTPUT_DIR+'/adata.h5',compression='lzf')

以上注释方式仅供参考

标签：03,True,res,cluster,转录,key,celltypist,adata
From： https://www.cnblogs.com/BioQuest/p/17110316.html

单细胞转录组实战03: 使用celltypist注释细胞

自定义函数

分为大群

celltypist注释

相关文章

赞助商

阅读排行