復(fù)現(xiàn)文章:Analyses of metastasis-associated genes in IDH wild-type glioma
數(shù)據(jù)來源:GSE84465
圖1
數(shù)據(jù)篩選條件
除了min.cells min.features percent.mt,還有Pearson相關(guān)系數(shù)的指標(biāo)(Fig S1)
根據(jù)Pearson相關(guān)系數(shù)大于0.4這個指標(biāo),去除了Tumor_BT_S1和Tumor_BT_S4兩個樣本沟蔑,只針對剩余的6個樣本分析
Fig S1
第一步:下載GSE84465_GBM_All_data.csv.gz和Metadata的SraRunTable.txt文件
第二步代碼
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
a<-read.table("GSE84465_GBM_All_data.csv.gz")
head(rownames(a))
tail(rownames(a),10)
a=a[1:(nrow(a)-5),] #最后5行行名異常,剔除
b<-read.table("SraRunTable.txt",sep = ",",header = T) #樣本信息
table(b$PATIENT_ID)
table(b$tissue)
table(b$tissue, b$PATIENT_ID)
new.b <- b[,c("Plate.ID","well","tissue","PATIENT_ID")]
new.b$sample <- paste0("X",b$Plate.ID,".",b$well)
head(new.b)
identical(colnames(a),new.b$sample)
pbmc <- CreateSeuratObject(counts = a)
head(pbmc@meta.data)
patient<-new.b$PATIENT_ID
pbmc <- AddMetaData(object = pbmc, metadata = patient, col.name = 'PATIENT_ID')
tissue<-new.b$tissue
pbmc <- AddMetaData(object = pbmc, metadata = tissue, col.name = 'Tissue')
sample<-paste0(tissue,"_",patient)
pbmc <- AddMetaData(object = pbmc, metadata = sample, col.name = 'Sample')
head(pbmc@meta.data)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
table(pbmc$PATIENT_ID,pbmc$Tissue)
###########################
#計算Pearson相關(guān)系數(shù)
pbmc.filt <- subset(pbmc, subset = PATIENT_ID=="BT_S6"&Tissue=="Periphery") #BT_S1 2 4 6 Tumor Periphery
dim(pbmc.filt)
plot1 <- FeatureScatter(pbmc.filt, feature1 = "nCount_RNA", feature2 = "percent.mt",group.by = "PATIENT_ID",pt.size=1.5)
plot2 <- FeatureScatter(pbmc.filt, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "PATIENT_ID",pt.size=1.5)
plot1 + plot2
############################
selected_f <- rownames(pbmc)[Matrix::rowSums(pbmc@assays$RNA@counts > 0 ) > 3]
pbmc.filt <- subset(pbmc, subset = nFeature_RNA > 50 & nFeature_RNA < 6000 & percent.mt < 0.05 &
Sample%in%c("Periphery_BT_S1","Periphery_BT_S2","Periphery_BT_S4","Periphery_BT_S6","Tumor_BT_S2","Tumor_BT_S6"),
features = selected_f)
dim(pbmc.filt)
VlnPlot(object = pbmc.filt, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),group.by = "Sample")
#標(biāo)準(zhǔn)化
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
### 鑒定高變異基因
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 1500) #變異最大的1500個基因
#輸出特征方差圖
top10 <- head(VariableFeatures(pbmc), 10)
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10,repel = TRUE)
plot1+plot2
圖2
#標(biāo)準(zhǔn)化
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
### 鑒定高變異基因
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 500) #變異最大的1500個基因
#輸出特征方差圖
top10 <- head(VariableFeatures(pbmc), 10)
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10,repel = TRUE)
plot1+plot2
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt")
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(object = pbmc, dims = 1:2, reduction = "pca",nfeatures = 20) #4個PC 20個基因
DimPlot(pbmc, reduction = "pca",group.by="Sample")
DimHeatmap(pbmc, dims = 1:2, cells = 500, balanced = TRUE,nfeatures = 30,ncol=2)
pbmc <- JackStraw(object = pbmc, num.replicate = 100)
pbmc <- ScoreJackStraw(object = pbmc, dims = 1:20)
JackStrawPlot(object = pbmc, dims = 1:20,reduction = "pca")
ElbowPlot(pbmc,reduction="pca") #根據(jù)ElbowPlot確定PC數(shù)量
##TSNE聚類分析
pbmc <- FindNeighbors(pbmc, dims = 1:20)
pbmc <- FindClusters(pbmc, resolution = 0.5)
#tSNE降維
pbmc <- RunTSNE(object = pbmc, dims = 1:20)
DimPlot(pbmc, reduction = "tsne",label = TRUE,pt.size = 1)
table(pbmc@meta.data$Tissue,pbmc@meta.data$seurat_clusters)
### 差異表達(dá)分析
# 找到每一個cluster當(dāng)中的marker聘芜,并且只展示陽性的marker狼讨。
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
head(pbmc.markers, n = 10)
write.table(pbmc.markers,file = 'pbmc.markers.txt',sep = '\t',row.names=F,quote=F)
library("tidyverse")
sig.markers<- pbmc.markers %>% select(gene,everything()) %>%
subset(p_val_adj<0.05 & abs(pbmc.markers$avg_log2FC)>1)
dim(sig.markers)
write.table(sig.markers,file="sigmarkers.xls",sep="\t",row.names=F,quote=F)
top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
#差異基因可視化
VlnPlot(object = pbmc, features = c("EGFR", "CCL3","AGXT2L1","DCN","GPR17","MOG","SYT1"),group.by = "seurat_clusters",pt.size = 1)
Cluster 2 3 6被定義為metastatic tumor cell,比較metastatic和non-mentastatic之間的基因表達(dá)差異
metastatic.markers <- FindMarkers(pbmc, ident.1 = c(2,3,6), ident.2 = c(0,1,4,5,7,8,9,10,11,12), min.pct = 0.25)
head(metastatic.markers,10)
sig.metastatic.markers<- metastatic.markers %>% select(colnames(metastatic.markers),everything()) %>%
subset(p_val_adj<0.05 & abs(metastatic.markers$avg_log2FC)>1)
dim(sig.metastatic.markers)
write.table(sig.metastatic.markers,file="sigmetastaticmarkers.xls",sep="\t",row.names=F,quote=F)