作者律秃,Evil Genius
針對(duì)visium爬橡、bin模式的Stereo-seq、HD等的腳本更新棒动。
對(duì)于單細(xì)胞而言糙申,大家網(wǎng)上找點(diǎn)代碼分析出個(gè)結(jié)果,已經(jīng)很容易了船惨,但是想分析的很好柜裸,還是很難的。
咱國(guó)內(nèi)的盜版能力粱锐,全世界都出了名了疙挺。
大家拿到盜版,千萬自己要學(xué)習(xí)一下卜范,看看有沒有錯(cuò)誤的地方衔统,我當(dāng)然不鼓勵(lì)盜版,但是也沒辦法海雪,尤其課上的內(nèi)容锦爵,一定要忠于數(shù)據(jù),忠于實(shí)際情況奥裸,不要瞎用险掀,生物信息,生物在前湾宙,背景內(nèi)容一定要了解清楚再去做樟氢。
我自己看了看,盜版很多內(nèi)容都是閹割后的侠鳄,大家自己要有識(shí)別能力埠啃,改過來,還有參數(shù)的配置伟恶,我上課是示例碴开,配的參數(shù)簡(jiǎn)單,別瞎套用,自己的項(xiàng)目要調(diào)整的潦牛。
大家以后工作了眶掌,一定要忠于數(shù)據(jù),忠于事實(shí)巴碗,生物信息往往都跟病人朴爬、腫瘤、發(fā)育等等相關(guān)橡淆,千萬不要有任何的僥幸心理召噩,小小的差錯(cuò)很容易造成人間悲劇。
今天我們更新腳本逸爵,隨著方法的更新和認(rèn)識(shí)的提升蚣常,腳本也會(huì)隨時(shí)變動(dòng)。
尤其借鑒一些高分文獻(xiàn)痊银,加深了認(rèn)識(shí)之后,腳本也要隨之更新施绎。
蘭衛(wèi)正在招賢納士溯革,一家好的公司,尤其上市公司谷醉,值得大家關(guān)注致稀。
首先是空間版本的inferCNV,空間區(qū)域注釋大家自己做俱尼。
#! user/bin/R
###zhaoyunfei
###Run inferCNV on one ST sample
###2024/11/19
### Parameters ------------------------------------------------------
option_list = list(
make_option(c("-s", "--sample_id"),
type="character",
default=NULL,
help="Sample name for ST",
metavar="character"),
make_option(c("-m", "--infercnv_mode"),
type="character",
default="subcluster",
help="InferCNV analysis_mode sample|subcluster|cell",
metavar="character"),
make_option(c("-c", "--label_column"),
type="character",
default="tumor_normal",
help="InferCNV cell_type_file column name",
metavar="character"),
make_option(c("-o", "--outdir"),
type="character",
default="./",
help="the outdir",
metavar="character"),
make_option(c("-w", "--window_length"),
type="integer",
default="151",
help="InferCNV smoothing window length",
metavar="integer"),
make_option(c("-d", "--sim_method"),
type="character",
default="meanvar",
help="InferCNV hspike simulation method",
metavar="character"),
make_option(c("-n", "--num_normal"),
type="integer",
default="-1",
help="Number normal spots to give input to inferCNV",
metavar="integer"),
make_option(c("-p", "--pnorm_bayes"),
type="numeric",
default="0.5",
help="Bayes posterior probability cutoff for inferCNV",
metavar="numeric"),
make_option(c("-r", "--leiden_resolution"),
type="numeric",
default="0.05",
help="Leiden resolution for clustering",
metavar="numeric"),
make_option("--output_prefix",
type="character",
default="",
help="Output prefix",
metavar="character")
)
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser)
sample_id=opt$sample_id
infercnv_mode=opt$infercnv_mode
label_column=opt$label_column
window_length=opt$window_length
sim_method=opt$sim_method
num_normal=opt$num_normal
pnorm_bayes=opt$pnorm_bayes
output_prefix=opt$output_prefix
library(infercnv)
library(Seurat)
library(reticulate)
library(googlesheets4)
library(tidyverse)
library(optparse)
# use_condaenv("inferCNV", required=TRUE)
reticulate::py_config()
公司一般參數(shù)是外置的抖单,就是寫Shell腳本配參數(shù)運(yùn)行主腳本,對(duì)于沒有l(wèi)inux系統(tǒng)的情況遇八,參數(shù)可以內(nèi)置矛绘,比如下面寫進(jìn)主腳本。
sample_id = "PT100000000001"
label_column = "tumor_regions"
infercnv_mode = "sample"
output_prefix = ""
window_length = 151
sim_method = "meanvar"
num_normal = 200
pnorm_bayes = 0.3
主腳本
short_label_column = case_when(label_column=="tumor_regions"~"region",
label_column=="tumor_normal"~"TN",
TRUE~NA_character_)
normal_label = case_when(label_column=="tumor_regions"~"Ref",
label_column=="tumor_normal"~"Normal",
TRUE~NA_character_)
if (!file.exists(outdir)) {dir.create(outdir,recursive = TRUE)}
if (infercnv_mode == "sample") {
output_dir = str_interp("${outdir}/${output_prefix}_${window_length}_${sim_method}_${num_normal}")
} else if (infercnv_mode == "subcluster") {
output_dir = str_interp("${outdir}/${output_prefix}subcluster_${window_length}_${sim_method}_${num_normal}")
} else if (infercnv_mode == "cell") {
output_dir = str_interp("${outdir}/${output_prefix}cell_${window_length}_${sim_method}_${num_normal}")
}
sample_outdir = str_interp("${output_dir}/${sample_id}/")
if (!dir.exists(sample_outdir)) {dir.create(sample_outdir)}
# Don't rerun sample already done
final_mat_filename = str_interp("${sample_outdir}/infercnv.observations.txt")
stopifnot(!file.exists(final_mat_filename))
print(paste0("[inferCNV] Start running for sample: ", sample_id))
# Read metadata from annotation sheet
obj <- read_st_obj(sample_id)
st_meta = read_subclone_tbl(sample_id)
obj = AddMetaData(obj, st_meta[Cells(obj),])
obj$tumor_normal = ifelse(obj$Filtered_tumor_regions==0, "Normal", "Tumor")
if (label_column == "Filtered_tumor_regions") {
subclone_color = create_subclone_color(obj$Filtered_tumor_regions)
subclone_color["0"] = "grey75"
subclone_color["Ref"] = NA
} else if (label_column == "tumor_normal") {
subclone_color = c("Tumor"= as.character(bound.color["Tumor"]), "Normal"=as.character(bound.color["TME"]))
}
Idents(obj) <- label_column
table(Idents(obj))
counts_matrix = GetAssayData(object = obj, slot = "counts")
####gene order file
gene_order = read.delim(file = "gencode_v32_gene_name.txt", header=FALSE, stringsAsFactors = FALSE, sep="\t")
cell_type_file = obj@meta.data %>% select(!!label_column)
gene_order_file = gene_order %>% column_to_rownames("V1")
Idents(obj) = label_column
##### select normal spots with low estimated purity #####
if (num_normal > 0) {
threshold <- sort(obj$Purity[obj$Filtered_tumor_regions==0])[num_normal]
if (is.na(threshold)) {
threshold = sort(obj$Purity)[num_normal]
obj$Filtered_tumor_regions = ifelse(obj$Purity<=threshold, "Ref", obj$Filtered_tumor_regions)
}
obj$Filtered_tumor_regions = case_when(
obj$Purity <= threshold & obj$Filtered_tumor_regions==0 ~ "Ref",
TRUE ~ obj$Filtered_tumor_regions
)
cell_type_file_filtered = obj@meta.data %>% select(!!label_column)
infercnv_obj = CreateInfercnvObject(raw_counts_matrix=as.matrix(counts_matrix),
annotations_file=cell_type_file_filtered,
gene_order_file=gene_order_file,
ref_group_names=as.vector(normal_label))
} else {
infercnv_obj = CreateInfercnvObject(raw_counts_matrix=as.matrix(counts_matrix),
annotations_file=cell_type_file,
gene_order_file=gene_order_file,
ref_group_names=as.vector(normal_label))
}
table(obj@meta.data[,label_column])
if (!dir.exists(str_interp("${output_dir}/infercnv_label"))) {dir.create(str_interp("${output_dir}/infercnv_label"))}
p_HE = SpatialPlot(obj, group.by=label_column, alpha=0, crop=F) +
NoLegend() + labs(title=sample_id)
p = SpatialPlot(obj, group.by=label_column, stroke=0, label=T, label.size=2, label.box=F, crop=F, alpha=0.8) +
scale_fill_manual(values=subclone_color, na.value=NA) + NoLegend() +
labs(title="InferCNV label")
ggsave(str_interp("${output_dir}/infercnv_label/infercnv_label_${sample_id}.pdf"), plot=p_HE+p, w=6, h=3.5)
infercnv_obj = infercnv::run(infercnv_obj,
window_length = window_length,
analysis_mode = infercnv_mode,
cutoff=0.1, # use 1 for smart-seq, 0.1 for 10x-genomics
out_dir=str_interp("${output_dir}/${sample_id}/"), # dir is auto-created for storing outputs
cluster_by_groups=T, # cluster
sim_method=sim_method,
plot_steps=T,
denoise=T,
HMM=T,
BayesMaxPNormal = pnorm_bayes,
mask_nonDE_genes = T,
resume_mode=F,
num_threads = 8)
print(paste0("[inferCNV] Finished running for sample: ", sample_id))
拿到熱圖
拿到CNV矩陣
接下來COMMOT腳本,python版本,一樣的封裝類型,比較簡(jiǎn)單