19.2 列表列
dataframe
- data.frame()默認(rèn)將列表作為列的列表處理
> data.frame(x=list(1:3,3:5))
x.1.3 x.3.5
1 1 3
2 2 4
3 3 5
解決:使用I()
> data.frame(
+ x=I(list(1:3,3:5)),
+ y=c("1,2","3,4,5")
+ )
x y
1 1, 2, 3 1,2
2 3, 4, 5 3,4,5
tibble
- 不需要修改輸入
> tibble(
+ x=list(1:3,3:5),
+ y=c("1,2","3,4,5")
+ )
# A tibble: 2 x 2
x y
<list> <chr>
1 <int [3]> 1,2
2 <int [3]> 3,4,5
> tb <- tibble(
+ x=list(1:3,3:5),
+ y=c("1,2","3,4,5")
+ )
> tb$x
[[1]]
[1] 1 2 3
[[2]]
[1] 3 4 5
> tb$y
[1] "1,2" "3,4,5"
tribble()
- tribble()比tibble()更容易空猜,可以自動(dòng)識(shí)別想要的列表
> trb <- tribble(
+ ~x,~y,
+ 1:3,"1,2",
+ 3:5,"3,4,5"
+ )
> trb
# A tibble: 2 x 2
x y
<list> <chr>
1 <int [3]> 1,2
2 <int [3]> 3,4,5
19.3 創(chuàng)建列表列
19.3.1 使用嵌套他挎; nest()
- 嵌套數(shù)據(jù)框
行 | 元觀測(cè) |
---|---|
列 (列表列) | 組成元觀測(cè)的具體觀測(cè) |
列(其他) | 定義觀測(cè)的變量 |
- nest() 的使用
- 用于分組數(shù)據(jù)框:保留用于分組的列跷敬,而將其他所有數(shù)據(jù)歸并到列表列中(其他所有數(shù)據(jù)歸并到列表列)
- 用于未分組數(shù)據(jù)框: 需要指定嵌套哪些列
19.3.2 使用向量化函數(shù)
> df <- tribble(
+ ~x1,
+ "a,b,c",
+ "d,e,f,g"
+ )
> str_split(df$x1,",")
[[1]]
[1] "a" "b" "c"
[[2]]
[1] "d" "e" "f" "g"
#嵌套:mutate(function()) [function()生成一個(gè)list]
> df %>%
+ mutate(x2= str_split(x1,","))
# A tibble: 2 x 2
x1 x2
<chr> <list>
1 a,b,c <chr [3]>
2 d,e,f,g <chr [4]>
# 還原嵌套:unnest()
> df %>%
+ mutate(x2= str_split(x1,",")) %>%
+ unnest()
# A tibble: 7 x 2
x1 x2
<chr> <chr>
1 a,b,c a
2 a,b,c b
3 a,b,c c
4 d,e,f,g d
5 d,e,f,g e
6 d,e,f,g f
7 d,e,f,g g
# 調(diào)用不同函數(shù)
> sim <- tribble(
+ ~f, ~params,
+ "runif", list(min=-1,max=-1),
+ "rnorm", list(sd=5),
+ "rpois", list(lambda= 10)
+ )
> sim %>%
+ mutate(sims = invoke_map(f,params,n=10))
# A tibble: 3 x 3
f params sims
<chr> <list> <list>
1 runif <named list [2]> <dbl [10]>
2 rnorm <named list [1]> <dbl [10]>
3 rpois <named list [1]> <int [10]>
19.3.3 使用多值摘要
summarize()只能返回單一值的摘要函數(shù),對(duì)于返回更長(zhǎng)向量的函數(shù),可以將結(jié)果包裝在一個(gè)list中
> mtcars %>%
+ group_by(cyl) %>%
+ summarize(q = quantile(mpg))
`summarise()` regrouping output by 'cyl' (override with `.groups` argument)
# A tibble: 15 x 2
# Groups: cyl [3]
cyl q
<dbl> <dbl>
1 4 21.4
2 4 22.8
3 4 26
4 4 30.4
5 4 33.9
6 6 17.8
7 6 18.6
8 6 19.7
9 6 21
10 6 21.4
11 8 10.4
12 8 14.4
13 8 15.2
14 8 16.2
15 8 19.2
我也不知道為啥這里沒有報(bào)錯(cuò)/(ㄒoㄒ)/~~
* 修改代碼: 將結(jié)果包裝為list
> mtcars %>%
+ group_by(cyl) %>%
+ summarize(q = list(quantile(mpg)))
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 3 x 2
cyl q
<dbl> <list>
1 4 <dbl [5]>
2 6 <dbl [5]>
3 8 <dbl [5]>
醬紫就對(duì)啦O(∩_∩)O
By the way, group_by 在不搭配其他函數(shù)使用就沒什么用
> mtcars %>% group_by(cyl)
# A tibble: 32 x 11
# Groups: cyl [3]
mpg cyl disp hp drat wt qsec vs am gear carb
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
看起來啥也沒變,但沒有毛病就是醬紫
直接unnes()顯示結(jié)果
> mtcars %>%
+ group_by(cyl) %>%
+ summarize(q = list(quantile(mpg))) %>%
+ unnest()
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 15 x 2
cyl q
<dbl> <dbl>
1 4 21.4
2 4 22.8
3 4 26
4 4 30.4
5 4 33.9
6 6 17.8
7 6 18.6
8 6 19.7
9 6 21
10 6 21.4
11 8 10.4
12 8 14.4
13 8 15.2
14 8 16.2
15 8 19.2
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(q)`
為了讓結(jié)果和比率值一同顯示
> mtcars %>%
+ group_by(cyl) %>%
+ summarise(p=list(probs),q=list(quantile(mpg,probs))) %>%
+ unnest()
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 15 x 3
cyl p q
<dbl> <dbl> <dbl>
1 4 0.01 21.4
2 4 0.25 22.8
3 4 0.5 26
4 4 0.75 30.4
5 4 0.99 33.8
6 6 0.01 17.8
7 6 0.25 18.6
8 6 0.5 19.7
9 6 0.75 21
10 6 0.99 21.4
11 8 0.01 10.4
12 8 0.25 14.4
13 8 0.5 15.2
14 8 0.75 16.2
15 8 0.99 19.1
19.3.4 使用命名列表
> x <- list(
+ a=1:5,
+ b=3:4,
+ c=5:6
+ )
> x
$a
[1] 1 2 3 4 5
$b
[1] 3 4
$c
[1] 5 6
> df <- enframe(x)
> df
# A tibble: 3 x 2
name value
<chr> <list>
1 a <int [5]>
2 b <int [2]>
3 c <int [2]>
> library(stringr)
> ?str_c
> x <- list(
+ a=1:5,
+ b=3:4,
+ c=5:6
+ )
> x
$a
[1] 1 2 3 4 5
$b
[1] 3 4
$c
[1] 5 6
> df <- enframe(x)
> df
# A tibble: 3 x 2
name value
<chr> <list>
1 a <int [5]>
2 b <int [2]>
3 c <int [2]>
> library(stringr)
> df %>%
+ mutate(
+ smry= map2_chr(
+ name,
+ value,
+ ~str_c(.x,":",y[1])
+ )
+ )
Error: Problem with `mutate()` input `smry`.
x object 'y' not found
i Input `smry` is `map2_chr(name, value, ~str_c(.x, ":", y[1]))`.
Run `rlang::last_error()` to see where the error occurred.
> df
# A tibble: 3 x 2
name value
<chr> <list>
1 a <int [5]>
2 b <int [2]>
3 c <int [2]>
> df %>%
+ mutate(
+ smry= map2_chr(
+ name,
+ value,
+ ~str_c(.x,":",.y[1])
+ )
+ )
# A tibble: 3 x 3
name value smry
<chr> <list> <chr>
1 a <int [5]> a:1
2 b <int [2]> b:3
3 c <int [2]> c:5
注意,str_c(.x,":",.y) .漏掉會(huì)報(bào)錯(cuò)
異構(gòu)列表篩選必備:叫稀!=居(膳殷。^▽^)
- 根據(jù)類型篩選
> df %>%
+ mutate(
+ smry= map2_chr(
+ name,
+ value,
+ ~str_c(.x,":",y[1])
+ )
+ )
Error: Problem with `mutate()` input `smry`.
x object 'y' not found
i Input `smry` is `map2_chr(name, value, ~str_c(.x, ":", y[1]))`.
19.4 簡(jiǎn)化列表列
> df <- tribble(
+ ~x,
+ letters[1:5],
+ 1:3,
+ runif(5)
+ )
> df
# A tibble: 3 x 1
x
<list>
1 <chr [5]>
2 <int [3]>
3 <dbl [5]>
> df %>%
+ mutate(
+ type= map_chr(x,typeof),
+ length= map_int(x,length)
+ )
# A tibble: 3 x 3
x type length
<list> <chr> <int>
1 <chr [5]> character 5
2 <int [3]> integer 3
3 <dbl [5]> double 5
- 從list中x的所有元素中提取指定變量中的內(nèi)容
.null=NA_real_ 可以提供一個(gè)缺失值的返回值
df <- tribble(
~x,
list(a=1,b=2),
list(a=2,c=4)
)
> df2 %>%
+ mutate(
+ a= map_dbl(x,"a"),
+ b= map_dbl(x,"b",.null=NA_real_)
+ )
# A tibble: 2 x 3
x a b
<list> <dbl> <dbl>
1 <named list [2]> 1 2
2 <named list [2]> 2 NA
19.4.2 嵌套還原
> tibble(
+ x=1:2,
+ y=list(1:4,1)
+ )
# A tibble: 2 x 2
x y
<int> <list>
1 1 <int [4]>
2 2 <dbl [1]>
> tibble(
+ x=1:2,
+ y=list(1:4,1)
+ ) %>%
+ unnest()
# A tibble: 5 x 2
x y
<int> <dbl>
1 1 1
2 1 2
3 1 3
4 1 4
5 2 1
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(y)`
第二行之重復(fù)了一次,這意味著:
- 不能同時(shí)還原包含不同數(shù)量元素的兩個(gè)列表列
df1 <- tribble(
~x,~y,~z,
1,c("a","b"),1:2,
2,"c",3
)
df1
df1 %>% unnest()
y和z每行中元素?cái)?shù)量相等九火,可以正常運(yùn)行
> df1 <- tribble(
+ ~x,~y,~z,
+ 1,c("a","b"),1:2,
+ 2,c("b","c"),3
+ )
> df1
# A tibble: 2 x 3
x y z
<dbl> <list> <list>
1 1 <chr [2]> <int [2]>
2 2 <chr [2]> <dbl [1]>
> df1 %>% unnest()
# A tibble: 4 x 3
x y z
<dbl> <chr> <dbl>
1 1 a 1
2 1 b 2
3 2 b 3
4 2 c 3
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(y, z)`
(lll¬ω¬)書上這里應(yīng)該跑不出來……可能更新后level up 了赚窃,應(yīng)該x、y岔激、z元素?cái)?shù)量相同就可以