上一篇文章[1]講解了數(shù)據(jù)的導入格式狂打,以及使用reshape2和tidyr包進行數(shù)據(jù)轉換,本文主要利用上篇數(shù)據(jù),進行直觀的操作演示并繪制圖形。,
1. 寬數(shù)據(jù)轉為長數(shù)據(jù)
rm(list = ls())
# 讀入寬格式數(shù)據(jù)
wide_line <- read.table(file = "C:/Users/Administrator/Desktop/wide_line.txt",
header = T, sep = "")
head(wide_line)
id sample date x1 x2 x3 x4
1 1 A 1 190 220 182 199
2 2 A 2 156 178 169 142
3 3 A 3 102 110 130 100
4 4 A 4 69 78 56 88
5 5 B 1 220 210 199 160
6 6 B 2 156 142 163 152
##################################寬數(shù)據(jù)轉換####################################
# tidyr package
library(tidyr)
wide_line$date <- as.factor(wide_line$date)
wide_line_long <- gather(data = wide_line, key = condition,
value = measurement, x1:x4, factor_key = TRUE)
head(wide_line_long)
id sample date condition measurement
1 1 A 1 x1 190
2 2 A 2 x1 156
3 3 A 3 x1 102
4 4 A 4 x1 69
5 5 B 1 x1 220
6 6 B 2 x1 156
# reshape2 package
library(reshape2)
wide_line$date <- as.factor(wide_line$date)
wide_line_long <- melt(data = wide_line, id.vars = c("id","sample","date"),
measure.vars= c("x1","x2","x3","x4"))
head(wide_line_long)
2. 長數(shù)據(jù)轉為寬數(shù)據(jù)
# 讀入長格式數(shù)據(jù)
long_line <- read.table(file = "C:/Users/Administrator/Desktop/long_line2.txt",
header = T, sep = "")
head(long_line)
id sample date value
1 1 A 1 190
2 2 A 1 220
3 3 A 1 182
4 4 A 1 199
5 5 A 2 156
6 6 A 2 178
##################################長數(shù)據(jù)轉換####################################
# tidyr package
library(tidyr)
long_line$date <- as.factor(long_line$date)
long_line_wide <- spread(data = long_line, key = date, value = value)
head(long_line_wide)
id sample 1 2 3 4
1 1 A 190 NA NA NA
2 2 A 220 NA NA NA
3 3 A 182 NA NA NA
4 4 A 199 NA NA NA
5 5 A NA 156 NA NA
6 6 A NA 178 NA NA
# reshape2 package
library(reshape2)
long_line$date <- as.factor(long_line$date)
long_line_wide <- dcast(data = long_line, formula = id + sample ~ date,
value.var="value")
head(long_line_wide)
3. 統(tǒng)計分析
library(Rmisc)
long_line_count <- summarySE(long_line, measurevar = "value",
groupvars = c("sample","date"))
head(long_line_count)
sample date N value sd se ci
1 A 1 4 197.75 16.378339 8.189170 26.06159
2 A 2 4 161.25 15.692355 7.846177 24.97004
3 A 3 4 110.50 13.699148 6.849574 21.79840
4 A 4 4 72.75 13.598407 6.799203 21.63810
5 B 1 4 197.25 26.272609 13.136305 41.80558
6 B 2 4 153.25 8.770215 4.385107 13.95537
4. 繪圖
library(ggplot2)
p1 <- ggplot(long_line_count, aes(x=date, y=value, group = sample, fill = sample)) +
geom_bar(stat = "identity",color = "black", size = 0.3,position = position_dodge()) +
geom_errorbar(aes(ymin = value - sd, ymax = value + sd), position = position_dodge(0.9),
width = 0.2)
p2 <- ggplot(long_line_count, aes(x=date, y=value, group = sample, color = sample)) +
geom_line(position = position_dodge(0.2)) +
geom_point(position = position_dodge(0.2)) +
geom_errorbar(aes(ymin = value - se, ymax = value + se), width = 0.2,
position = position_dodge(0.2)) +
theme_minimal()
library(ggpubr)
ggarrange(p1, p2, labels = c("A","B"))