一鍵完成單因素方差分析及可視化的R腳本

library(optparse)
library(tidyverse)
library(agricolae)
library(reshape2)
library(ggplot2)
library(ggpubr)
mytheme=theme(
  panel.grid.major=element_blank(),
  panel.grid.minor=element_blank(),
  plot.title = element_text(vjust = -8.5,hjust = 0.1),
  axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
  axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
  axis.text = element_text(size = 20,face = "bold"),
  axis.text.x = element_text(colour = "black",size = 14),
  axis.text.y = element_text(colour = "black",size = 14),
  legend.text = element_text(size = 15,face = "bold"),
  legend.position = "none"#是否刪除圖例) 
)
option_list=list(
  make_option(c("-f","--file"),type = "character",default = FALSE,
              help = "The input file"),
  make_option(c("-d","--depvar"),type = "character",default = FALSE,
              help="The column name of the dependent variable"),
  make_option(c("-i","--indepvar"),type="character",default=FALSE,
              help="The column name of the independent variable"),
  make_option(c("-t","--type"),type = "character",default = FALSE,
              help = "Type of drawing: boxplot or barplot"),
  make_option(c("-o","--out"),type = "character",default = FALSE,
              help = "the out put file name")
)
opt = parse_args(OptionParser(option_list = option_list, usage = "This Script is use for Analysis of variance and plotting"))
###參數(shù)檢驗
if(opt$type != "boxplot" && opt$type != "barplot"){
  print("Please input the right type of drawing:boxplot or barplot!")
  stop("Please input the right type of drawing:boxplot or barplot!")
}

out_name=paste(opt$out,"pdf",sep = ".")
#數(shù)據(jù)列從第一列開始是ID蒙挑,第二列是分組信息,剩下的列均為數(shù)據(jù)列
df=read.table(opt$file,sep = "\t",header = T)
colname_list=colnames(df)
if(opt$depvar %in% colname_list == FALSE){
  print("Please input the correct column name of the dependent variable")
  print(colname_list)
  stop("Please input the correct column name of the dependent variable")
}

depvar_index=which(colname_list==opt$depvar)
indepvar_index=which(colname_list==opt$indepvar)
ss = df[depvar_index]
colnames(ss) = c("count")
ss$group = df[,indepvar_index]
# 正態(tài)性檢驗 Shapiro-Wilk normality test,保存p-value
normality=shapiro.test(ss$count)
p1 = normality$p.value
# 方差齊性檢驗 Bartlett test of homogeneity of variances拨与,保存p-value
homo = bartlett.test(count~group, data = ss)
p2 = homo$p.value
model = aov(count~group, data = ss)
###繪制柱狀圖


if(opt$type=="barplot"){
  if (p1 > 0.05 & p2 > 0.05) {
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    wtx1 = summary(model)
    wtx2 = wtx1[[1]]
    wtx3 = wtx2[5]
    # 條件2. anova存在顯著差異分組
    if ( wtx3$`Pr(>F)`[1]< 0.05) {
      # 進行多重比較,不矯正P值
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      p=ggplot(aa , aes(x = group, y = mean,colour= group)) +
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") +
        geom_text(aes(label = groups,y=ymax, x = group,vjust = -0.3,size = 6))+
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if ( wtx3$`Pr(>F)`[1]>= 0.05){#  anova不存在顯著差異分組
      out = LSD.test(model,"group", p.adj="none")
      aa = out$groups
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      res = round(wtx3$`Pr(>F)`[1],3)
      p = ggplot(aa , aes(x = group, y = mean,colour= group)) +
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") +
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"aov",res,sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }
  }else if( p1 <.05| p2 <.05){
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    krusk=compare_means(count~group, data = ss, method = "kruskal.test")
    sumkrusk=as.data.frame(krusk)
    if ( sumkrusk[3]< 0.05) {
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      p = ggplot(aa , aes(x = group, y = mean,colour= group)) + 
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") + 
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"), y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"kruskal.test",sumkrusk[3],sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if(sumkrusk[3] >= 0.05){
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A")
      p = ggplot(aa , aes(x = group, y = mean,colour= group)) + 
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") + 
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"kruskal.test",sumkrusk[3],sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }
    }
  
}else if(opt$type=="boxplot"){
  if (p1>.05& p2 >.05){
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    wtx1 = summary(model)
    wtx2 = wtx1[[1]]
    wtx3 = wtx2[5]
    if ( wtx3$`Pr(>F)`[1]< 0.05) {
      out = LSD.test(model,"group", p.adj="none")#進行多重比較艾猜,不矯正P值
      aa = out$group#結果顯示:標記字母法
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      p=ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i," group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,sep = ":"))+
        geom_text(data=data_box, aes(x=group, y=y, color=group, label= stat)) +
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
        }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if(wtx3$`Pr(>F)`[1]>= 0.05){
      out = LSD.test(model,"group", p.adj="none")#進行多重比較买喧,不矯正P值
      aa = out$group#結果顯示:標記字母法
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      res = round(wtx3$`Pr(>F)`[1],3)
      p = ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i,"box", sep = "_"),
             y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"aov",res,sep = ":"))+
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
      }
   
  }else if (p1 <.05| p2 <.05){
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    krusk=compare_means(count~group, data = ss, method = "kruskal.test")
    sumkrusk=as.data.frame(krusk)
    if( sumkrusk[3]< 0.05){
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      wtq = levels(as.factor(df$group))
      lis = combn(wtq, 2)
      print(lis)
      x =lis
      my_comparisons = tapply(x,rep(1:ncol(x),each=nrow(x)),function(i)i)
      p=ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,sep = ":"))+
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+
        stat_compare_means()+
        stat_compare_means(comparisons=my_comparisons,label = "p.signif",hide.ns = F)+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if(sumkrusk[3] >= 0.05){
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      res = round(sumkrusk[3],3)
      p=ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i,"box", sep = "_"),
             y="group",title = paste("Normality test",p1,"Homogeneity of variance",p2,"aov",res,sep = ":"))+
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }
  }
  
}
    

腳本有五個參數(shù)

-f:輸出的數(shù)據(jù),第一列是樣本名箩朴,第二列是自變量也就是分組信息岗喉,第三列至以后就是因變量,就是分組效應

-d:因變量的列名

-i:自變量的列名

-t:選擇可視化的類型炸庞,箱線圖或者條形圖

-o:輸出文件名稱的前綴

使用示例:

Rscript aov.R -f input2.txt -d response -i trt -t boxplot -o 123
image.png
?著作權歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末钱床,一起剝皮案震驚了整個濱河市,隨后出現(xiàn)的幾起案子埠居,更是在濱河造成了極大的恐慌查牌,老刑警劉巖,帶你破解...
    沈念sama閱讀 206,968評論 6 482
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件滥壕,死亡現(xiàn)場離奇詭異纸颜,居然都是意外死亡,警方通過查閱死者的電腦和手機绎橘,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 88,601評論 2 382
  • 文/潘曉璐 我一進店門胁孙,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人称鳞,你說我怎么就攤上這事涮较。” “怎么了冈止?”我有些...
    開封第一講書人閱讀 153,220評論 0 344
  • 文/不壞的土叔 我叫張陵狂票,是天一觀的道長。 經(jīng)常有香客問我熙暴,道長闺属,這世上最難降的妖魔是什么慌盯? 我笑而不...
    開封第一講書人閱讀 55,416評論 1 279
  • 正文 為了忘掉前任,我火速辦了婚禮掂器,結果婚禮上亚皂,老公的妹妹穿的比我還像新娘。我一直安慰自己国瓮,他們只是感情好孕讳,可當我...
    茶點故事閱讀 64,425評論 5 374
  • 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著巍膘,像睡著了一般厂财。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上峡懈,一...
    開封第一講書人閱讀 49,144評論 1 285
  • 那天璃饱,我揣著相機與錄音,去河邊找鬼肪康。 笑死荚恶,一個胖子當著我的面吹牛,可吹牛的內(nèi)容都是我干的磷支。 我是一名探鬼主播谒撼,決...
    沈念sama閱讀 38,432評論 3 401
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼雾狈!你這毒婦竟也來了廓潜?” 一聲冷哼從身側(cè)響起,我...
    開封第一講書人閱讀 37,088評論 0 261
  • 序言:老撾萬榮一對情侶失蹤善榛,失蹤者是張志新(化名)和其女友劉穎辩蛋,沒想到半個月后,有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體移盆,經(jīng)...
    沈念sama閱讀 43,586評論 1 300
  • 正文 獨居荒郊野嶺守林人離奇死亡悼院,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點故事閱讀 36,028評論 2 325
  • 正文 我和宋清朗相戀三年,在試婚紗的時候發(fā)現(xiàn)自己被綠了咒循。 大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片据途。...
    茶點故事閱讀 38,137評論 1 334
  • 序言:一個原本活蹦亂跳的男人離奇死亡,死狀恐怖叙甸,靈堂內(nèi)的尸體忽然破棺而出颖医,到底是詐尸還是另有隱情,我是刑警寧澤蚁署,帶...
    沈念sama閱讀 33,783評論 4 324
  • 正文 年R本政府宣布便脊,位于F島的核電站蚂四,受9級特大地震影響光戈,放射性物質(zhì)發(fā)生泄漏哪痰。R本人自食惡果不足惜,卻給世界環(huán)境...
    茶點故事閱讀 39,343評論 3 307
  • 文/蒙蒙 一久妆、第九天 我趴在偏房一處隱蔽的房頂上張望晌杰。 院中可真熱鬧,春花似錦筷弦、人聲如沸肋演。這莊子的主人今日做“春日...
    開封第一講書人閱讀 30,333評論 0 19
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽爹殊。三九已至,卻和暖如春奸绷,著一層夾襖步出監(jiān)牢的瞬間梗夸,已是汗流浹背。 一陣腳步聲響...
    開封第一講書人閱讀 31,559評論 1 262
  • 我被黑心中介騙來泰國打工号醉, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留反症,地道東北人。 一個月前我還...
    沈念sama閱讀 45,595評論 2 355
  • 正文 我出身青樓畔派,卻偏偏與公主長得像铅碍,于是被迫代替她去往敵國和親。 傳聞我的和親對象是個殘疾皇子线椰,可洞房花燭夜當晚...
    茶點故事閱讀 42,901評論 2 345

推薦閱讀更多精彩內(nèi)容