在上篇文章中仪搔,我們初步探索了PHATE的使用方法憎账,發(fā)現(xiàn)它在揭示一些連續(xù)分化過程中不同細(xì)胞狀態(tài)之間的微小局部差異具有很好的效果领铐,同時也能保留細(xì)胞全局的整體結(jié)構(gòu)毛雇。在本節(jié)教程中泼菌,我們將復(fù)現(xiàn)演示近期發(fā)表在Science Immunology期刊上的一篇文章的結(jié)果括丁,進(jìn)一步學(xué)習(xí)PHATE的相關(guān)使用方法陨亡。
文章結(jié)果圖:
本文復(fù)現(xiàn)圖:
文章數(shù)據(jù)下載
文章處理后的基因表達(dá)矩陣文件存放在GEO數(shù)據(jù)庫中犀填,檢索號為 GSE182509舵稠,這里只提供了pkl格式的表達(dá)矩陣超升,需要用python的pickle包進(jìn)行讀取,我已將其轉(zhuǎn)換為TSV文件存放在我的百度云盤中哺徊,有需要的可以下載使用室琢。
鏈接:https://pan.baidu.com/s/1IoSIYoEfTzZarLWXWvgvzg
提取碼:gkd9
加載示例數(shù)據(jù)
# 導(dǎo)入所需的python包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import phate
import scprep
import magic
# 加載示例數(shù)據(jù)
chronic = scprep.io.load_tsv("GSM5530565_Chronic_LCMV_processed.matrix.txt")
acute = scprep.io.load_tsv("GSM5530564_Acute_LCMV_processed.matrix.txt")
tumor8 = scprep.io.load_tsv("GSM5530560_Lung_week8_processed.matrix.txt")
dln8 = scprep.io.load_tsv("GSM5530561_LN_week8_processed.matrix.txt")
tumor17 = scprep.io.load_tsv("GSM5530562_Lung_week17_processed.matrix.txt")
dln17 = scprep.io.load_tsv("GSM5530563_LN_week17_processed.matrix.txt")
# 查看示例數(shù)據(jù)
chronic.head()
# Mrpl15 (ENSMUSG00000033845) ... CAAA01147332.1 (ENSMUSG00000095742)
#0 ...
#AAACCTGAGAGTAAGG-1 0.0 ... 0.000000
#AAACCTGGTGTGAAAT-1 0.0 ... 0.000000
#AAACGGGAGGCTCATT-1 0.0 ... 0.000000
#AAACGGGGTAGCTTGT-1 0.0 ... 0.000000
#AAACGGGGTCGAATCT-1 0.0 ... 0.552073
#[5 rows x 11595 columns]
chronic.shape
#(1185, 11595)
acute.shape
#(10768, 12960)
tumor8.shape
#(806, 11749)
dln8.shape
#(1742, 12116)
tumor17.shape
#(731, 11150)
dln17.shape
#(876, 11595)
數(shù)據(jù)質(zhì)控預(yù)處理和數(shù)據(jù)歸一化
由于下載的數(shù)據(jù)是預(yù)先已經(jīng)進(jìn)行質(zhì)控過濾和歸一化處理的,這里將不再進(jìn)行處理落追。詳細(xì)用法見上期 使用PHATE進(jìn)行單細(xì)胞高維數(shù)據(jù)的可視化
使用PHATE進(jìn)行低維嵌入降維可視化
### analysis for chronic sample ###
#Embedding Data Using PHATE
# Instantiating the PHATE estimator
phate_operator = phate.PHATE(n_jobs=-2)
Y_phate = phate_operator.fit_transform(chronic)
Y_phate
#array([[ -5.34563999, 6.11670333],
# [ 29.8771147 , 8.33029219],
# [ 9.44218856, -25.94568946],
# ...,
# [-10.86402299, -1.15554 ],
# [-14.65615727, -1.03794057],
# [-16.53150258, 4.50993521]])
細(xì)胞聚類分群
# cell clustering
clusters = phate.cluster.kmeans(phate_operator, n_clusters=8)
clusters
#array([0, 2, 1, ..., 4, 4, 4], dtype=int32)
# 聚類結(jié)果可視化
scprep.plot.scatter2d(Y_phate, c=clusters, figsize=(8,7), cmap="Spectral",
ticks=False, label_prefix="PHATE", title= "Chronic LCMV")
plt.savefig("plot_phate_chronic_2d_by_cluster.png")
使用MAGIC進(jìn)行數(shù)據(jù)填充
#Creating the MAGIC operator
magic_op = magic.MAGIC()
# Running MAGIC for all genes
chronic_magic = magic_op.fit_transform(chronic, genes='all_genes')
chronic_magic.head()
# Mrpl15 (ENSMUSG00000033845) ... CAAA01147332.1 (ENSMUSG00000095742)
#0 ...
#AAACCTGAGAGTAAGG-1 0.150924 ... 0.076723
#AAACCTGGTGTGAAAT-1 0.153466 ... 0.067584
#AAACGGGAGGCTCATT-1 0.146601 ... 0.064838
# rename columns names
chronic_magic.columns = [i.split(" ")[0] for i in chronic_magic.columns.tolist()]
# marker基因可視化
markers = ["Sell", "Ccr7", "Tcf7", "Slamf6", "Xcl1", "Il7r", "Eomes", "Tbx21", "Gzmb", "Prf1", "Pdcd1", "Havcr2", "Cd101", "Cx3cr1", "Cxcr6"]
for marker in markers:
# 2d plot
scprep.plot.scatter2d(Y_phate, c=chronic_magic[marker], figsize=(8,7), cmap="Reds",
ticks=False, label_prefix="PHATE", title=marker + " magic expression")
plt.savefig("plot_chronic_magic_marker_2d_" + marker + ".png")
根據(jù)這些marker基因的表達(dá)情況盈滴,我們將不同的cluster進(jìn)行細(xì)胞類型的注釋,得到以下的細(xì)胞注釋結(jié)果轿钠。
Tnaive:Sell, Ccr7
Tsl: Tcf7, Slamf6, Xcl1
Ttrans: Cx3cr1, Cxcr6
Tex: Pdcd1, Havcr2, Cd101
多樣本合并分析
### analysis for combined five sample ###
# Merge all datasets and create a vector representing the time point of each sample
alldata = [chronic,tumor8,tumor17,dln8,dln17]
EBT_counts, sample_labels = scprep.utils.combine_batches(
alldata,
["Chronic","Early Tumor","Late Tumor","Early LN","Late LN"],
append_to_cell_names=True
)
del alldata # removes objects from memory
EBT_counts.head()
# Mrpl15 (ENSMUSG00000033845) ... CAAA01147332.1 (ENSMUSG00000095742)
#AAACCTGAGAGTAAGG-1_Chronic 0.0 ... 0.000000
#AAACCTGGTGTGAAAT-1_Chronic 0.0 ... 0.000000
#AAACGGGAGGCTCATT-1_Chronic 0.0 ... 0.000000
#AAACGGGGTAGCTTGT-1_Chronic 0.0 ... 0.000000
#AAACGGGGTCGAATCT-1_Chronic 0.0 ... 0.552073
#[5 rows x 10246 columns]
EBT_counts.shape
#(5340, 10246)
sample_labels
#AAACCTGAGAGTAAGG-1_Chronic Chronic
#AAACCTGGTGTGAAAT-1_Chronic Chronic
#AAACGGGAGGCTCATT-1_Chronic Chronic
#AAACGGGGTAGCTTGT-1_Chronic Chronic
#AAACGGGGTCGAATCT-1_Chronic Chronic
# ...
#Name: sample_labels, Length: 5340, dtype: object
PHATE降維可視化
#Embedding Data Using PHATE
# Instantiating the PHATE estimator
phate_operator = phate.PHATE(n_jobs=-2)
Y_phate = phate_operator.fit_transform(EBT_counts)
Y_phate
#array([[ 79.05651647, 15.42592929],
# [ 20.72815444, 25.65566379],
# [ 90.57712893, -4.77917562],
# ...,
# [-52.39011592, 39.20142516],
# [-28.51731009, -8.66499775],
# [-46.00734805, -17.37265621]])
scprep.plot.scatter2d(Y_phate, c=sample_labels, figsize=(10,8), cmap="Spectral",
ticks=False, label_prefix="PHATE")
plt.savefig("plot_phate_2d_by_sample.png")
# 3D visualization
phate_operator.set_params(n_components=3)
Y_phate_3d = phate_operator.transform()
Y_phate_3d
#array([[ 77.85894712, 13.21732854, -11.29708591],
# [ 17.23971363, 19.47621167, -27.54015408],
# [ 87.69788489, 4.0231593 , 13.16082805],
# ...,
# [-46.46850791, 42.68065699, -2.93836416],
# [-20.53357918, -4.9821892 , -22.70265013],
# [-36.18351133, -9.56494547, -29.67206442]])
scprep.plot.scatter3d(Y_phate_3d, c=sample_labels, figsize=(8,6), cmap="Spectral",
ticks=False, label_prefix="PHATE")
plt.savefig("plot_phate_3d_by_sample.png")
細(xì)胞聚類分群
# cell clustering
clusters = phate.cluster.kmeans(phate_operator, n_clusters=12)
clusters
#array([8, 6, 1, ..., 2, 4, 4], dtype=int32)
# save meta data
meta = pd.merge(pd.DataFrame(sample_labels),pd.DataFrame(clusters,index=sample_labels.index,columns=["cluster"]),left_index=True,right_index=True)
meta.head()
# sample_labels cluster
#AAACCTGAGAGTAAGG-1_Chronic Chronic 8
#AAACCTGGTGTGAAAT-1_Chronic Chronic 6
#AAACGGGAGGCTCATT-1_Chronic Chronic 1
#AAACGGGGTAGCTTGT-1_Chronic Chronic 8
#AAACGGGGTCGAATCT-1_Chronic Chronic 8
meta.to_csv("metadata.csv")
scprep.plot.scatter3d(Y_phate_3d, c=clusters, figsize=(8,6), cmap="Spectral",
ticks=False, label_prefix="PHATE")
plt.savefig("plot_phate_3d_by_cluster.png")
scprep.plot.scatter2d(Y_phate, c=clusters, figsize=(8,6), cmap="Spectral",
ticks=False, label_prefix="PHATE")
plt.savefig("plot_phate_2d_by_cluster.png")
# to save as a gif:
scprep.plot.rotate_scatter3d(Y_phate_3d, c=sample_labels,
figsize=(8,6), cmap="Spectral",
ticks=False, label_prefix="PHATE", filename="phate.gif")
使用MAGIC進(jìn)行數(shù)據(jù)填充
# rename columns names
EBT_counts.columns = [i.split(" ")[0] for i in EBT_counts.columns.tolist()]
# MAGIC imputation
#Creating the MAGIC operator
magic_op = magic.MAGIC()
#Running MAGIC with gene selection
#bmmsc_magic = magic_op.fit_transform(bmmsc_data, genes=["Mpo", "Klf1", "Ifitm1"])
#bmmsc_magic.head()
#Visualizing gene-gene relationships
#fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))
#scprep.plot.scatter(x=bmmsc_data['Mpo'], y=bmmsc_data['Klf1'], c=bmmsc_data['Ifitm1'], ax=ax1,
# xlabel='Mpo', ylabel='Klf1', legend_title="Ifitm1", title='Before MAGIC')
#scprep.plot.scatter(x=bmmsc_magic['Mpo'], y=bmmsc_magic['Klf1'], c=bmmsc_magic['Ifitm1'], ax=ax2,
# xlabel='Mpo', ylabel='Klf1', legend_title="Ifitm1", title='After MAGIC')
#plt.tight_layout()
#plt.show()
# Running MAGIC for all genes
EBT_counts_magic = magic_op.fit_transform(EBT_counts, genes='all_genes')
EBT_counts_magic.head()
markers = ["Sell", "Ccr7", "Tcf7", "Slamf6", "Xcl1", "Il7r", "Eomes", "Tbx21", "Gzmb", "Prf1", "Pdcd1", "Havcr2", "Cd101", "Cx3cr1", "Cxcr6"]
for marker in markers:
# 2d plot
scprep.plot.scatter2d(Y_phate, c=EBT_counts_magic[marker], figsize=(8,6), cmap="Reds",
ticks=False, label_prefix="PHATE", title=marker + " magic expression")
plt.savefig("plot_magic_marker_2d_" + marker + ".png")
# 3d plot
scprep.plot.scatter3d(Y_phate_3d, c=EBT_counts_magic[marker], figsize=(8,6), cmap="Reds",
ticks=False, label_prefix="PHATE", title=marker + " magic expression")
plt.savefig("plot_magic_marker_3d_" + marker + ".png")
基因差異表達(dá)分析
# Differential analysis
# By samples
de_samples = scprep.stats.differential_expression_by_cluster(EBT_counts_magic,clusters=sample_labels,direction="up")
de_samples
de_samples["Chronic"]
for key,value in de_samples.items():
print(value.head(n=10))
# save DE data
de_samples["Chronic"].to_csv("DEs_Chronic.csv")
de_samples["Early Tumor"].to_csv("DEs_Early_Tumor.csv")
de_samples["Early LN"].to_csv("DEs_Early_LN.csv")
# By clusters
de_clusters = scprep.stats.differential_expression_by_cluster(EBT_counts_magic,clusters=clusters,direction="up")
de_clusters
de_clusters[0]
for key,value in de_clusters.items():
print(value.head(n=10))