python 將多個(gè)細(xì)胞的表達(dá)量合并成一個(gè)假細(xì)胞的表達(dá)量咬扇。
import anndata
import matplotlib.pyplot as plt
import numpy as np
import scanpy as sc
import anndata as ad
import pandas as pd
import math
import re
def condense_cell(inadata=None,ingroup=None,insize=100,label='adata'):
meta=pd.DataFrame(inadata.obs)
meta=meta.sort_values(by=ingroup)
dfcount=meta[ingroup].value_counts().rename_axis('type').reset_index(name='counts')
dfcount.index=list(dfcount['type'])
lorder=list(set(meta[ingroup]))
lorder.sort()
dfcount=dfcount.loc[lorder,:]
lindex=[]
for i in dfcount['counts']:
tmp=list(range(0,i))
tmp=[math.ceil((i+1)/insize) for i in tmp]
lindex.extend(tmp)
meta['group']=meta[ingroup].astype('str')+'-'+['pseudocell_'+str(i) for i in lindex]
meta.to_csv(label+'_'+ingroup+'_'+str(insize)+'cells_condese_metadata.xls',sep='\t')
meta=meta.loc[list(inadata.obs.index),:]
inadata.obs['group']=meta['group']
grouped = inadata.to_df().groupby(inadata.obs['group']).mean()
new_adata = sc.AnnData(X=grouped.values, obs=pd.DataFrame(index=list(grouped.index)), var=inadata.var)
new_adata.obs['type']=[re.sub('-pseudocell_.*','',i) for i in list(new_adata.obs_names)]
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
new_adata.var["mt"] = new_adata.var_names.str.startswith("MT-")
# ribosomal genes
new_adata.var["ribo"] = new_adata.var_names.str.startswith(("RPS", "RPL"))
# hemoglobin genes
new_adata.var["hb"] = new_adata.var_names.str.contains("^HB[^(P)]")
sc.pp.calculate_qc_metrics(
new_adata, qc_vars=["mt", "ribo", "hb"], inplace=True, log1p=False
)
new_adata.write(label+'_'+ingroup+'_'+str(insize)+'cells_condese.h5ad')
condense_cell(inadata=inadata,ingroup='group',insize=10,label='sc')