R包學(xué)習(xí)
幾個(gè)重要包
dplyr
- mutate (增加列跟束,格式參考:mutate(test, new = Sepal.Length * Sepal.Width))
rm(list=ls())
test <- iris[c(1:2,51:52,101:102),]
View(test)
colnames(test)
library(dplyr)
mutate(test,Spal.volume=Sepal.Length*Sepal.Width)
mutate(test,Petal.volume=Petal.Length*Petal.Width)
mutate(test,new)
select(test,c(1,3))
select(test,Sepal.Length)
#filter(.data=,condition_1,condition_2)#將返回相匹配的數(shù)據(jù)
#同時(shí)可以多條件匹配multiple condition渐行,當(dāng)采用多條件匹配時(shí)可直接condition1,condition2或者condition1&condition2
table(test$Species)
filter(.data=iris,Sepal.Length>5,Sepal.Width<3.5)
filter(.data=iris,Sepal.Length>5,Species=="setosa")
filter(test, Species == "setosa")
filter(test, Species == "versicolor")
filter(test, Species == "virginica")
#要使用filter_all()叠骑、filter_if()李皇、filter_at()需要先去掉Species列(非數(shù)值型列)
iris_data<-iris%>% select(-Species)
#篩選所有屬性小于6的行
iris_data%>% filter_all(all_vars(.<6))
#篩選任意一個(gè)屬性大于3的行
iris_data%>% filter_all(any_vars(.>3))
#篩選以sep開頭的屬性任一大于3的行
iris_data%>% filter_at(vars(starts_with("Sep")), any_vars(. >3))
#R中自帶數(shù)據(jù)集mtcars,篩選任意一個(gè)屬性大于150的行
filter_all(mtcars, any_vars(. > 150))
#篩選以d開頭的屬性任一可被2整除的行
filter_at(mtcars, vars(starts_with("d")), any_vars((. %% 2) == 0))
filter(test, Species == "setosa"&Sepal.Length > 5 )
filter(test, Species %in% c("setosa","versicolor"))
#arrange(),按某1列或某幾列對(duì)整個(gè)表格進(jìn)行排序
arrange(test, Sepal.Length)#默認(rèn)從小到大排序
arrange(test, desc(Sepal.Length))#用desc從大到小
arrange(test, Sepal.Length, desc(Sepal.Width))
#summarise():匯總 對(duì)數(shù)據(jù)進(jìn)行匯總操作,結(jié)合group_by使用實(shí)用性強(qiáng)
# 先按照Species分組座云,計(jì)算每組Sepal.Length的平均值和標(biāo)準(zhǔn)差
group_by(test, Species)
summarise(group_by(test, Species),mean(Sepal.Length), sd(Sepal.Length))
##########################################################################
#dplyr兩個(gè)實(shí)用技能
#管道操作 %>% (cmd/ctr + shift + M)
test %>%
group_by(Species) %>%
summarise(mean(Sepal.Length), sd(Sepal.Length))
#count統(tǒng)計(jì)某列的unique值
count(test,Species)
#########################################################################
#dplyr處理關(guān)系數(shù)據(jù)
options(stringsAsFactors = F)
test1 <- data.frame(x = c('b','e','f','x'),
z = c("A","B","C",'D'),
stringsAsFactors = F)
test1
test2 <- data.frame(x = c('a','b','c','d','e','f'),
y = c(1,2,3,4,5,6),
stringsAsFactors = F)
test2
#1.內(nèi)連inner_join,取交集
inner_join(test1, test2, by = "x")
#左連left_join
left_join(test1, test2, by = 'x')
#3.全連full_join
F1=full_join( test1, test2, by = 'x')
#F2=full_join( test2, test1, by='x')與F1不一樣
#半連接:返回能夠與y表匹配的x表所有記錄semi_join
semi_join(x = test1, y = test2, by = 'x')
#反連接:返回?zé)o法與y表匹配的x表的所記錄anti_join
anti_join(x = test2, y = test1, by = 'x')
#簡(jiǎn)單合并
test1 <- data.frame(x = c(1,2,3,4), y = c(10,20,30,40))
test1
test2 <- data.frame(x = c(5,6), y = c(50,60))
test2
test3 <- data.frame(z = c(100,200,300,400))
test3
bind_rows(test1, test2)
bind_cols(test1, test3)