用慣了Seurat計算DEGs新荤,使用Scanpy進行計算得到的格式會很不適應,可讀性有點差小作,因此可以使用以下函數進行格式轉換亭姥,并且根據FC進行了排序篩選Top基因。
#calculate DEG
def calculate_deg(adata,incol='leiden',use_raw=False):
sc.tl.rank_genes_groups(adata, groupby=incol, use_raw=use_raw)
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame({group + '_' + key[:1]: result[key][group] for group in groups for key in ['names', 'scores', 'logfoldchanges','pvals_adj']}).head(2000)
splits = [ df[i] for i in df.columns]
lnames=[]
lscores=[]
llog2fc=[]
lpvalue=[]
lcluster=[]
for n in range(0,int(len(splits)/4)):
lnames+=(list(splits[4*n]))
lscores+=(list(splits[4*n+1]))
llog2fc+=(list(splits[4*n+2]))
lpvalue+=(list(splits[4*n+3]))
lcluster+=(list([str(splits[4*n].name.replace('_n',''))]*len(splits[n])))
mydict={'gene':lnames,'score':lscores,'log2fc':llog2fc,'p_value':lpvalue,'cluster':lcluster}
out=pd.DataFrame(mydict)
out['fc']=[ pow(2,i) for i in out['log2fc']]
out=out.sort_values(by=['cluster','log2fc'],na_position='first',ascending=False)
out.to_csv(incol+'_out_deg.xls',sep='\t', header=True, index=True)