一、初步認(rèn)識(shí)tidyr包
1.首先安裝tidyr包
install.package("tidyr")
2.初步了解tidyr包的知識(shí)
必應(yīng)搜索tidyr撕氧,找到相關(guān)介紹tidyr包的知識(shí)(https://tidyr.tidyverse.org/, https://www.r-bloggers.com/data-manipulation-with-tidyr/)
tidyr包中主要涉及:
1)缺失值的簡(jiǎn)單補(bǔ)齊
2)長(zhǎng)形表變寬形表與寬形表變長(zhǎng)形表
3)列分割與列合并
二瘤缩、初步應(yīng)用tidyr包
1.缺失值的簡(jiǎn)單補(bǔ)齊
> library(tidyr)#加載包
> library(dplyr)#加載包
載入程輯包:‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
> #創(chuàng)建含有缺失值的數(shù)據(jù)框示例
> x <- c(1,2,7,8,NA,10,22,NA,15)
> y <-c('a',NA,'b',NA,'b','a','a','b','a')
> df <- data.frame(x = x, y = y)
> df
x y
1 1 a
2 2 <NA>
3 7 b
4 8 <NA>
5 NA b
6 10 a
7 22 a
8 NA b
9 15 a
> drop_na(df)
x y
1 1 a
3 7 b
6 10 a
7 22 a
9 15 a
> x <- c(1,2,7,8,NA,10,22,NA,15)
> y <-c('a',NA,'b',NA,'b','a','a','b','a')
> df <- data.frame(x = x, y = y)
> df
x y
1 1 a
2 2 <NA>
3 7 b
4 8 <NA>
5 NA b
6 10 a
7 22 a
8 NA b
9 15 a
> fill(df)#根據(jù)上一行的數(shù)值填充
x y
1 1 a
2 2 <NA>
3 7 b
4 8 <NA>
5 NA b
6 10 a
7 22 a
8 NA b
9 15 a
> fill(df,c(x,y))#根據(jù)上一行的數(shù)值填充
x y
1 1 a
2 2 a
3 7 b
4 8 b
5 8 b
6 10 a
7 22 a
8 22 b
9 15 a
> replace_na(df,list(x=2,y=b))#空值填進(jìn)去特定的一個(gè)數(shù)值
Error in is_list(replace) : object 'b' not found
> replace_na(df,list(c(x=2,y=b))#空值填進(jìn)去特定的一個(gè)數(shù)值
+ )
Error: unexpected input in:
"replace_na(df,list(c(x=2,y=b))#空值填進(jìn)去特定的一個(gè)數(shù)值
?
> replace_na(df,list(c(x=2,y=b)))#空值填進(jìn)去特定的一個(gè)數(shù)值
Error in is_list(replace) : object 'b' not found
> replace_na(df,list(x=2)#空值填進(jìn)去特定的一個(gè)數(shù)值
+ )
x y
1 1 a
2 2 <NA>
3 7 b
4 8 <NA>
5 2 b
6 10 a
7 22 a
8 2 b
9 15 a
> replace_na(df,list(y=b))#空值填進(jìn)去特定的一個(gè)數(shù)值
Error in is_list(replace) : object 'b' not found
> replace_na(df,list(y="b"))#空值填進(jìn)去特定的一個(gè)數(shù)值
x y
1 1 a
2 2 b
3 7 b
4 8 b
5 NA b
6 10 a
7 22 a
8 NA b
9 15 a
> x <- c(1,2,7,8,NA,10,22,NA,15)
> y <-c('a',NA,'b',NA,'b','a','a','b','a')
> df <- data.frame(x = x, y = y)
> replace_na(df,list(x=2,y="b"))#空值填進(jìn)去特定的一個(gè)數(shù)值
x y
1 1 a
2 2 b
3 7 b
4 8 b
5 2 b
6 10 a
7 22 a
8 2 b
9 15 a
> x <- c(1,2,7,8,NA,10,22,NA,15)
> y <-c('a',NA,'b',NA,'b','a','a','b','a')
> df <- data.frame(x = x, y = y)
> df
x y
1 1 a
2 2 <NA>
3 7 b
4 8 <NA>
5 NA b
6 10 a
7 22 a
8 NA b
9 15 a
> #計(jì)算x的均值和中位數(shù)
> x_mean <- mean(df$x, na.rm = TRUE)
> x_mean
[1] 9.285714
> #計(jì)算y的眾數(shù)
> y_mode <- as.character(df$y[which.max(table(df$y))])
> y_mode
[1] "a"
> table(df$y)
a b
4 3
> which.max(table(df$y))
a
1
> f$y[which.max(table(df$y))]
Error: object 'f' not found
> df$y[which.max(table(df$y))]
[1] a
Levels: a b
> #替換數(shù)據(jù)框df中x和y的缺失值
> df2 <- replace_na(data = df, replace = list(x = x_mean, y = y_mode))
> df2
x y
1 1.000000 a
2 2.000000 a
3 7.000000 b
4 8.000000 a
5 9.285714 b
6 10.000000 a
7 22.000000 a
8 9.285714 b
9 15.000000 a
2.長(zhǎng)形表變寬形表與寬形表變長(zhǎng)形表
> #長(zhǎng)形表
> name <- c('A','A','A','B','B')
> product <- c('P1','P2','P3','P1','P4')
> price <- c(100,130,55,100,78)
> df_long <- data.frame(name = name, product = product, price = price)
> df_long
name product price
1 A P1 100
2 A P2 130
3 A P3 55
4 B P1 100
5 B P4 78
> #寬形表
> name <- c('A','B','C')
> gender <- c('f','f','m')
> province <- c('JS','SH','HN')
> age <- c(18,22,19)
> df_wide <- data.frame(name = name, gender = gender, province = province, age = age)
> df_wide
name gender province age
1 A f JS 18
2 B f SH 22
3 C m HN 19
> #使用spread()函數(shù)實(shí)現(xiàn)長(zhǎng)表轉(zhuǎn)寬表,語(yǔ)法如下
> #spread(data, key, value, fill = NA, convert = FALSE, drop = TRUE)
> #data:為需要轉(zhuǎn)換的長(zhǎng)形表
> #key:需要將變量值拓展為字段的變量
> #value:需要分散的值
> #fill:對(duì)于缺失值伦泥,可將fill的值賦值給被轉(zhuǎn)型后的缺失值
> df_long_expand <- spread(data = df_long, key = product, value = price)
> df_long_expand
name P1 P2 P3 P4
1 A 100 130 55 NA
2 B 100 NA NA 78
> #使用gather()函數(shù)實(shí)現(xiàn)寬表轉(zhuǎn)長(zhǎng)表剥啤,語(yǔ)法如下:
> #gather(data, key, value, ..., na.rm = FALSE, convert = FALSE)
> #data:需要被轉(zhuǎn)換的寬形表
> #key:將原數(shù)據(jù)框中的所有列賦給一個(gè)新變量key
> #value:將原數(shù)據(jù)框中的所有值賦給一個(gè)新變量value
> #...:可以指定哪些列聚到一列中
> #na.rm:是否刪除缺失值
> df_wide_gather <- gather(data = df_wide, key = variable, value = value)
Warning message:
attributes are not identical across measure variables;
they will be dropped
> df_wide_gather
variable value
1 name A
2 name B
3 name C
4 gender f
5 gender f
6 gender m
7 province JS
8 province SH
9 province HN
10 age 18
11 age 22
12 age 19
3.列分割與列合并
> id <- c(1,2)
> datetime <- c(as.POSIXlt('2015-12-31 13:23:44'), as.POSIXlt('2016-01-28 21:14:12'))
> df <- data.frame(id = id, datetime = datetime)
> df
id datetime
1 1 2015-12-31 13:23:44
2 2 2016-01-28 21:14:12
> #將日期時(shí)間數(shù)據(jù)切割為日期和時(shí)間兩列
> separate <- df %>% separate(., col = datetime, into = c('date', 'time'), sep = ' ', remove = FALSE)
> separate
id datetime date time
1 1 2015-12-31 13:23:44 2015-12-31 13:23:44
2 2 2016-01-28 21:14:12 2016-01-28 21:14:12