filter(flights, arr_delay >= 120)
??nycflights13
library(nycflights13)
install.packages("nycflights13")
library(tidyverse)
flights
?flights
View(flights)
calss(flights)
鍙橀噺綾誨瀷
int 鏁存暟錛宒bl 鍙岀簿搴︽誕鐐規(guī)暟銆佸疄鏁幫紝dttm 鏃ユ湡+鏃墮棿錛宭gl 閫昏緫鍨嬪彉閲忥紝浠呭寘鎷琓RUE鍜孎ALSE, fctr 鍥犲瓙錛宒ate 鏃ユ湡鍨嬪彉閲?
filter 鎸夎絳涢€?
arrange 緇欒鎺掑簭
select 鎸夊垪絳涢€?
mutate鏍規(guī)嵁鐜版湁鐨勫彉閲忓垱寤烘柊鐨勫彉閲?
summarize鎽樿緇熻
浣跨敤filter榪涜絳涢€?
filter(flights, month == 1, day == 1)
jan1 <- filter(flights, month == 1, day == 1)
jan1
(dec25 <- filter(flights, month == 12, day == 25))
dec25
姣旇緝榪愮畻絎?
,>=,<,<=,!+,==
filter(flights, month == 11 | month == 12)
a <- filter(flights, month == 11 | month == 12)
View(a)
filter(flights, month %in% c(11,12))
df <- tibble(x = c(1, NA, 3))
filter(df, x > 1)
filter(df, is.na(x) | x >1)
arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))##闄嶅簭
鏃犺姝e簭鍜屽€掑簭錛岀┖鍊兼帓鍦ㄦ渶鍚?
df <- tibble(x = c(5, 2, NA))
arrange(df, x)
arrange(df, desc(x))
arrange(flights, desc(is.na(dep_time)), dep_time)
select鎸夊垪絳涢€?
select(flights, year, month, day)#鏍規(guī)嵁鍒楀悕鍗曢€夋嫨鏌愬嚑鍒?
select(flights,year:day)#榪為€夊嚑鍒?
select(flights, -(year:day))#榪為€?+鍙嶉€?
starts_with("abc")#閫夋嫨abc寮€澶寸殑鍚嶇О
ends_with("eyz") #鍖歸厤浠モ€渆yz鈥濈粨灝劇殑鍚嶇О
contains("ijk") #鍖歸厤鍖呭惈ijk鐨勫悕縐?
matches("(.)\1") #鍖歸厤姝g‘琛ㄨ揪寮忕殑閭d簺鍙橀噺
num_range("x",1:3) #鍖歸厤x1,x2,x3
rename(flights,tail_num = tailnum) #閲嶅懡鍚嶏紝淇敼tailnum鏀逛負(fù)tail_num
select(flights, time_hour, air_time, everything())#鏌愬嚑鍒楃Щ鍔ㄥ埌寮€澶達(dá)紝everyting鏄叾浣欏嚑鍒椼€?
緇冧範(fàn)
vars <- c("year", "month", "day", "dep_delay", "arr_delay")
select(flights, one_of(vars))
var 鏄痗haracter vector(鐗瑰緛鍚戦噺)銆傝繖鏍峰啓鍑虹殑select錛堬級(jí)涓嶉渶瑕佸啓澶氫釜鍚戦噺鍚?
select(flights,contains("YEAR",ignore.case = FALSE)) #鏈潵鏄拷鐣ュぇ灝忓啓錛岃繖鏍鋒槸涓嶅拷鐣ュぇ灝忓啓
5. mutate()鐢熸垚鏂板垪
mutate鐢熸垚鏂板垪鍚庯紝娣誨姞鏂板垪鍒版暟鎹鏈熬錛岀敓鎴愭柊鐨勬暟鎹銆倀ransmute鍒欐槸鍙繚鐣欐柊鐢熸垚鐨勫垪錛屽師鏈夌殑鍒楀姞鍑忎箻闄ょ瓑璁$畻鍑虹殑緇撴灉浣滀負(fù)鏂板垪
鐢熸垚gain鍜宻peed
flights_sml <- select(flights,
year:day,
ends_with("delay"),
distance,
air_time)
flights_sml
mutate(flights_sml,
gain = dep_delay - arr_delay,
speed = distance / air_time * 60)
鏂板垪鍙互鐩存帴鍙備笌鍙︿竴鏂板垪鐨勭敓鎴?
mutate(flights_sml,
gain = dep_delay - arr_delay,
hours = air_time / 60,
gain_per_hour = gain / hours)
transmute()鍙繚鐣欐柊鍒?
transmute(flights,
gain = dep_delay - arr_delay,
hours = air_time /60,
gain_per_hour = gain / hours)
summarise()##榪涜鍒嗙粍鎽樿
summarise(flights, delay = mean(dep_delay, na.rm = TRUE))
group_by()鍑芥暟浼?xì)鋴蓅ummarize鏇存湁鐢?
by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
綆¢亾鎿嶄綔
綆¢亾鎿嶄綔浼?xì)绠€鍖栦唬鐮侊紝鏁版嵁妗嗗悕涓嶅繀閲嶅鍐欏嬈?
level1鈥斺€旂敓鎴愪腑闂翠駭鐗?
by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
level2鈥斺€?
summarise(group_by(flights, year, month, day), delay = mean(dep_delay, na.rm = TRUE))
level3鈥斺€旂閬撴搷浣滈摼鎺ヤ袱姝ワ紝鏁版嵁妗嗗悕鍦ㄦ嫭鍙烽噷闈?
group_by(flights, year, month, day) %>% summarise(delay = mean(dep_delay, na.rm = TRUE))
level4_
flights %>% group_by(year, month, day) %>% summarise(delay = mean(dep_delay, na.rm = TRUE))
錛?2錛夌己澶卞€?
濡傛灉鏁版嵁涓湁NA錛岃綆楃粨鏋滃氨鍏ㄩ儴涓篘A錛屾墍浠ヤ唬鐮佷腑闇€瑕? na.rm = TRUE. 璁$畻鍓嶇Щ闄ょ己澶卞€?
錛堣鏁幫級(jí)
璁℃暟
渚嬪瓙錛氭壘鍑哄鉤鍧囧歡璇椂闂存渶闀跨殑椋炴満
錛堟牴鎹畉ailname榪涜鍒嗙粍錛屽緱鍒扮殑鏄叏騫村悓涓€鏋墮鏈虹殑鍚勭鏁版嵁錛?
carriers <- group_by(flights, carrier)
s1 <- summarise(carriers, n())
s1
s2 <- count(flights, carrier)
s2
mu <- mutate(carriers,n = n())
s3 <- distinct(mu, n)
s3
s4 <- distinct(mu, carrier, n)
s4
鍥涚鏂規(guī)硶緇熻鐨勭粨鏋滄槸涓€鑷寸殑錛宒istinct鏄幓閲嶅銆?