点击查看代码
import argparse
import scanpy as sc
import hotspot
import numpy as np
import mplscience
import matplotlib
import matplotlib.pyplot as plt
import pdb
def ParserInput():
parser = argparse.ArgumentParser(description='hotspot.space')
parser.add_argument("--h5ad","-a",required=True, dest='h5ad',help='h5ad path',type=str)
parser.add_argument("--outdir","-o",required=True, dest='outdir',help='result directory',type=str)
parser.add_argument("--species","-s",required=False, dest='species',help='species,hsa or mmu',type=str,default="hsa")
parser.add_argument("--prefix","-p",required=False, dest='prefix',help='result file prefix',type=str,default="sample")
parser.add_argument("--threads","-t",required=False, dest='threads',help='threads num',type=int,default=4)
parser.add_argument("--mingenes","-g",required=False, dest='mingenes',help='min genes per spot',type=int,default=50)
parser.add_argument("--mincells","-c",required=False, dest='mincells',help='min cells per spot',type=int,default=1)
args = parser.parse_args()
return args
#filter spot and renormalize the data for expression viz on plots
def PreProcess(adata,min_genes,min_cells):
sc.pp.filter_cells(adata, min_genes=min_genes)
sc.pp.filter_genes(adata, min_cells=min_cells)
adata.obs["total_counts"] = np.asarray(adata.X.sum(1)).ravel()
adata.layers["csc_counts"] = adata.X.tocsc()
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
return adata
# Create the Hotspot object and the neighborhood graph
def RunHotspot(adata):
hs = hotspot.Hotspot(
adata,
layer_key="csc_counts",
model='danb',
latent_obsm_key="spatial",
umi_counts_obs_key="total_counts",
)
hs.create_knn_graph(weighted_graph=True, n_neighbors=30,)
return hs
'''
@compute autocorrelations for each gene
@Grouping genes into spatial modules
@Compute pair-wise local correlations between these genes
'''
def ModuleDef(hs,jobs):
hs_results = hs.compute_autocorrelations(jobs=jobs)
hs_genes = hs_results.head(6000).index
lcz = hs.compute_local_correlations(hs_genes, jobs=jobs)
modules = hs.create_modules(min_gene_threshold=20, core_only=False, fdr_threshold=0.05)
return modules
def main():
args = ParserInput()
outdir = args.outdir
prefix = args.prefix
jobs = args.threads
min_genes= args.mingenes
min_cells = args.mincells
adata = sc.read(args.h5ad)
adata = PreProcess(adata,min_genes,min_cells)
hs = RunHotspot(adata)
modules = ModuleDef(hs,jobs)
modules.value_counts()
fig = plt.figure()
hs.plot_local_correlations()
plt.savefig(outdir+"/"+prefix+'.module.pdf')
results = hs.results.join(hs.modules)
# Show the top genes for a module
for module in results.Module.unique().tolist():
if(not np.isnan(module)):
result = results.loc[results.Module == module]
result.sort_values('Z', ascending=False).head(10)
result.to_csv(outdir+"/"+prefix+".module."+str(module)+".csv")
# Plot top gene expression in spatial position
cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
'grays', ['#DDDDDD', '#000000'])
genes = result.sort_values('Z', ascending=False).head(6).index
fig = plt.figure()
with mplscience.style_context():
sc.pl.spatial(
adata,
color=genes,
cmap=cmap,
frameon=False,
vmin='p0',
vmax='p95',
spot_size=30,
)
plt.savefig(outdir+"/"+prefix+'.module.'+str(module)+'.topgene.pdf')
if __name__ == '__main__':
main()