AverageExpression
gzh:BBio
Seurat中用于計算cluster基因平均表達(dá)值的函數(shù)搔体,為啥這個結(jié)果和FindMarkers中差異倍數(shù)avg_logFC有出入呢本昏?
#計算每個cluster中基因的平均表達(dá)
df <- AverageExpression(pbmc, verbose=F)$RNAhead(df)
#0 1 2
# MS4A1 0.000000 2.083443 171.6152
# CD79B 10.814657 17.548842 152.1344
# CD79A 0.000000 11.618333 215.0869
# HLA-DRA 37.105857 405.850522 1158.0852
# TCL1A 0.000000 3.463203 142.0748
# HLA-DQB1 3.968254 45.353183 169.2762
AverageExpression源碼
getAnywhere('AverageExpression')
# fxn.average <- switch(EXPR = slot, data = function(x) {
# rowMeans(x = expm1(x = x))
# }, rowMeans)# for (j in levels(x = Idents(object))) {
# temp.cells <- WhichCells(object = object, idents = j)
# features.assay <- unique(x = intersect(x = features.assay,
# y = rownames(x = data.use)))
# if (length(x = temp.cells) == 1) {
# data.temp <- (data.use[features.assay, temp.cells])
# if (slot == "data") {
# data.temp <- expm1(x = data.temp)
# }
# }
# if (length(x = temp.cells) > 1) {
# data.temp <- fxn.average(data.use[features.assay,
# temp.cells, drop = FALSE])
# }
# data.all[[j]] <- data.temp
# if (verbose) {
# message(paste("Finished averaging", assays[i],
# "for cluster", j))
# }
# if (i == 1) {
# ident.new <- c(ident.new, as.character(x = ident.orig[temp.cells[1]]))
# }
# }
從源碼可以看出,對數(shù)據(jù)中的cluster依次進(jìn)行基因平均表達(dá)值的計算庭呜, rowMeans(x = expm1(x = x))表明平均表達(dá)值為data中數(shù)據(jù)轉(zhuǎn)指數(shù)形式后減1的平均值忌堂,并不是簡單的取data數(shù)據(jù)的平均值露戒,實際上就是NormalizeData中l(wèi)og1p的逆步驟护蝶。
FindMarkers源碼
getAnywhere('FindMarkers.default')# mean.fxn <- if (is.null(x = reduction) && slot != "scale.data") {
# switch(EXPR = slot, data = function(x) {
# return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use))
# }, function(x) {
# return(log(x = rowMeans(x = x) + pseudocount.use))
# })
# }
# else {
# rowMeans# }
# data.1 <- mean.fxn(data[features, cells.1, drop = FALSE])
# data.2 <- mean.fxn(data[features, cells.2, drop = FALSE])
# total.diff <- (data.1 - data.2)
從源碼看出avg_logFC的計算過程先計算平均表達(dá)值,加1再取log對數(shù)后兩組值相減的結(jié)果心墅。pseudocount.use默認(rèn)值為1酿矢。
LYZ基因示例
AverageExpression(pbmc_small, features = 'LYZ')
#0 1 2
#LYZ 44.31667 987.141 262.0951
FindMarkers(pbmc_small, features = 'LYZ',ident.1 = 0, ident.2 = 1)
#p_val avg_logFC pct.1 pct.2 p_val_adj
#LYZ 6.997602e-11 -3.08215 0.417 1 1.609449e-08
log((44.31667+1)/(987.141+1))
#-3.08215
馬克marker
#T細(xì)胞
FeaturePlot(object = pbmc_small, features = c('CD3D', 'CD8A', 'IL7R'),ncol=3)