上一步得到了质控和整合后的数据,这一步需要聚类分群和细胞注释
from pathlib import Path
import re
from io import StringIO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import celltypist
OUTPUT_DIR='output/02.Cell_Type'
Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)
自定义函数
flatten = lambda nest_list: sum(([x] if not isinstance(x, list) else flatten(x) for x in nest_list), [])
def label_helper(number_of_cluster: int):
_s1 = ",\n".join([str(i) for i in range(number_of_cluster+1)])
_s2 = "\nnew_cluster_names ='''\n" + _s1 + ",\n'''\n"
print(_s2)
def labeled(
adata: sc.AnnData,
cluster_names: str,
reference_key: str,
cell_type_key: str = 'CellType',
inplace: bool = True
):
_adata = adata if inplace else adata.copy()
_ref_df = _adata.obs.loc[:, [reference_key]]
_annot_df = pd.read_csv(StringIO(cluster_names), header=None, dtype='object')
_adata.obs[cell_type_key] = pd.merge(
_ref_df, _annot_df, left_on=reference_key, right_on=0, how='left')[1].values
return None if inplace else _adata
分为大群
手动先分为Epi、Endo、Fib和免疫细胞。
adata = sc.read_h5ad('output/01.preprocess/adata.h5')
分辨率为1
res = '1'
sc.tl.leiden(adata,resolution=float(res),key_added=f'leiden_{res}',random_state=1314)
sc.pl.umap(adata, color=f'leiden_{res}',legend_loc='on data')
主要的marker点图
major = {'Fib':['MME','COL1A1','PDGFRA','COL1A2'],'Endo':['PECAM1','RAMP2'],'Epi':['EPCAM'],'Immune':'PTPRC'}
sc.pl.dotplot(adata,var_names=major,groupby=f'leiden_{res}',dot_max=0.5,dot_min=0.1)
主要的marker UMAP图
sc.pl.umap(adata,color=flatten(list(major.values())))
注释
label_helper(22)
new_cluster_names ='''
0,
1,
2,
3,
4,
5,Epi
6,
7,
8,
9,
10,
11,
12,Fib
13,
14,
15,Endo
16,
17,
18,
19,
20,
21,
22,Epi
'''
细胞大群UMAP图
labeled(adata,cluster_names=new_cluster_names,reference_key=f'leiden_{res}',cell_type_key='CellTypeS1')
adata.obs.CellTypeS1.fillna('Immune',inplace=True)
sc.pl.umap(adata,color='CellTypeS1');
celltypist注释
下载celltypist的模型
celltypist.models.download_models()
预测免疫大群
_, ax = plt.subplots(1, 1, figsize=(5, 5))
predictions = celltypist.annotate(adata, model = 'Immune_All_High.pkl')
dp = celltypist.dotplot(predictions,
use_as_reference = 'leiden_'+res, use_as_prediction = 'predicted_labels',
filter_prediction=0.1,
return_fig=True
,ax=ax
)
dp.style(grid=True,cmap = 'RdBu_r').show();
预测详细的免疫细胞
_, ax = plt.subplots(1, 1, figsize=(5, 5))
predictions = celltypist.annotate(adata, model = 'Immune_All_Low.pkl')
dp = celltypist.dotplot(predictions, use_as_reference = 'leiden_'+res, filter_prediction=0.2,use_as_prediction = 'predicted_labels',return_fig=True,ax=ax)
dp.style(grid=True,cmap = 'RdBu_r').show();
Human_Lung_Atlas预测
_, ax = plt.subplots(1, 1, figsize=(5, 5))
predictions = celltypist.annotate(adata, model = 'Human_Lung_Atlas.pkl')
dp = celltypist.dotplot(predictions, use_as_reference = 'leiden_'+res, filter_prediction=0.2,use_as_prediction = 'predicted_labels',return_fig=True,ax=ax)
dp.style(grid=True,cmap = 'RdBu_r').show()
看一下Treg细胞主要在哪里表达
# Treg
sc.pl.umap(adata,color=['TNFRSF9','FOXP3'])
注释细胞
new_cluster_names ='''
0,CD4+ T
1,CD4+ T
2,CD8+ T
3,CD4+ T
4,Mac
5,Epi
6,CD4+ T
7,Treg
8,CD8+ T
9,DC
10,CD8+ T
11,Mast
12,Fib
13,B
14,Treg
15,Endo
16,T
17,DC
18,CD4+ T
19,Plasma
20,DC
21,pDC
22,Epi
'''
细胞分群UMAP图
labeled(adata,cluster_names=new_cluster_names,reference_key=f'leiden_{res}',cell_type_key='CellTypeS2')
sc.pl.umap(adata, color='CellTypeS2',legend_loc='on data')
写出数据
adata.write_h5ad(OUTPUT_DIR+'/adata.h5',compression='lzf')
以上注释方式仅供参考
标签:03,True,res,cluster,转录,key,celltypist,adata From: https://www.cnblogs.com/BioQuest/p/17110316.html