0、Rstudio界面介紹及快捷鍵
運(yùn)行當(dāng)前/選中行 ctrl+enter
中止運(yùn)行 esc
插入 <- Alt+-
插入 %>% Ctrl+Shift+M
快捷注釋?zhuān)ㄖС侄嘈羞x中)ctrl+shift+c 快捷注釋后,如取消注釋ctrl+shift+c
Rstudio自動(dòng)補(bǔ)全 tab
x <- 5
1村视、生成數(shù)據(jù)
set.seed(0)
set.seed(1)
c()
seq() #生成等差數(shù)據(jù)
rep() #重復(fù)生成數(shù)據(jù)
rep(1:10,2)
factor() #因子數(shù)據(jù)
語(yǔ)法 factor(x = character(), levels, labels = levels,…)
補(bǔ)充cut
x <- c("Man", "Male", "Man", "Lady", "Female")
Map from 4 different values to only two levels:
xf <- factor(x, levels = c("Male", "Man" , "Lady", "Female"),
labels = c("Male", "Male", "Female", "Female"))
> [1] Male Male Male Female Female
> Levels: Male Female
x <- c("Man", "Male", "Man", "Lady", "Female")
Map from 4 different values to only two levels:
xf <- factor(x, levels = c("Male", "Man" , "Lady", "Female"),
labels = c("1", "1", "2", "2"))
[1] 1 1 1 2 2
Levels: 1 2
######## cut分段
x <- round(rnorm(100)*100,digits = 2)
x_cut <- cut(x,breaks = seq(-100,100,length.out = 11))
x_group_count <- table(cut(x,breaks = seq(-100,100,length.out = 11)))
paste() #連接字符 paste0 無(wú)空格連接
paste("A","B","C")
paste0("A","B","C")
> paste("A","B")
[1] "A B"
> paste("A","B","C")
[1] "A B C"
> paste0("A","B","C")
[1] "ABC"
sample() # 抽樣
2、數(shù)據(jù)類(lèi)型
vector # 向量(數(shù)值型、字符型、邏輯型) #單一向量中必須擁有同一類(lèi)型
matrix矩陣
matrix(1:12,nrow = 3) #等效matrix(1:12,ncol = 4)
matrix(1:12,ncol = 4)
matrix(1:12,nrow = 3,byrow = TRUE)
data.frame # 數(shù)據(jù)框
tibble #一種data.frame
x <- matrix(1:12,nrow = 3,byrow = TRUE)
df <- data.frame(x)
as.matrix(df) # 轉(zhuǎn)換為matrix數(shù)據(jù)
is.data.frame(x) # 判斷是否欸data.frame類(lèi)型
array # 數(shù)組
list列表
list(x,df)
3舵变、讀存數(shù)據(jù)(read、save)
3.1設(shè)置工作目錄【很重要】
setwd("E:/") #設(shè)置當(dāng)前工作目錄為"E:/"
getwd() #讀取當(dāng)前工作空間的工作目錄(文件讀取保存路徑)
read.table() #讀取帶分隔符的文本/數(shù)據(jù)文件
read.csv() #讀取.csv格式的數(shù)據(jù)瘦穆,read.table的一種特定應(yīng)用
df <- read.csv("da.csv",header = T, stringsAsFactors= T)
str(df)
excel數(shù)據(jù)文件讀取 .XLS .xlsx
install.packages("readxl")
library(readxl)
df <- read_excel("da.xlsx",sheet=1)
保存為.Rdata
write.table()
write.csv(df,"dfx.csv") # .csv格式導(dǎo)出
4纪隙、數(shù)據(jù)操作 查詢(xún)、引用扛或、增刪(合并)绵咱、排序、dplyr tidyr
###########數(shù)據(jù)概況 str() summary() class()
x <- matrix(1:12,nrow = 3,byrow = TRUE)
df <- data.frame(x)
str(df)
summary(df)
> str(df)
'data.frame': 3 obs. of 4 variables:
$ X1: int 1 5 9
$ X2: int 2 6 10
$ X3: int 3 7 11
$ X4: int 4 8 12
> summary(df)
X1 X2 X3 X4
Min. :1 Min. : 2 Min. : 3 Min. : 4
1st Qu.:3 1st Qu.: 4 1st Qu.: 5 1st Qu.: 6
Median :5 Median : 6 Median : 7 Median : 8
Mean :5 Mean : 6 Mean : 7 Mean : 8
3rd Qu.:7 3rd Qu.: 8 3rd Qu.: 9 3rd Qu.:10
Max. :9 Max. :10 Max. :11 Max. :12
############ 引用 ############
x[1,4] # 值引用 x[行索引,列索引]
行/列引用 x[行索引,] 或x[,列索引]
x[1,] # 引用第一行
x[,4] # 引用第一列
x[2:3,2:3] # 行列混合引用(矩陣) x[行初始索引:行終止索引,列初始索引:列終止索引]
> x[1,4] # 值引用 x[行索引,列索引]
[1] 4
> # 行/列引用 x[行索引,] 或x[,列索引]
> x[1,] # 引用第一行
[1] 1 2 3 4
> x[,4] # 引用第一列
[1] 4 8 12
> x[2:3,2:3] # 行列混合引用(矩陣) x[行初始索引:行終止索引,列初始索引:列終止索引]
[,1] [,2]
[1,] 6 7
[2,] 10 11
names(df)[5] <- "testNAME" # 列重命名(二位數(shù)據(jù)框熙兔,變量)names()
############ (多)數(shù)據(jù)(關(guān)聯(lián))合并 ############
cbind(x,matrix(1:nrow(x),ncol = 1)) # 將x 與 matrix(1:nrow(x),ncol = 1) 按列合并
S3 method for class 'data.frame'
merge(x, y, by = intersect(names(x), names(y)),
by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all,
sort = TRUE, suffixes = c(".x",".y"), no.dups = TRUE,
incomparables = NULL, ...)
rbind(x,rep("A",ncol(x))) # 向數(shù)據(jù)集中增加行
join #補(bǔ)充學(xué)習(xí)多數(shù)據(jù)關(guān)聯(lián)匹配-join相關(guān) https://mp.weixin.qq.com/s/EAJe0EXq2JWlTWEgREV5vw
############ 單元格悲伶、行列值(計(jì)算)修改(邏輯修改) ############
x <- matrix(1:12,nrow = 3,byrow = TRUE)
x
x[1,1] <- 100 #修改指定單元格
x[,2] <- 0 # 修改指定列
transform(airquality, Ozone = -Ozone) # 對(duì)原始列進(jìn)行計(jì)算
transform(airquality, new = -Ozone, Temp = (Temp-32)/1.8) # 對(duì)原始列進(jìn)行計(jì)算
> x <- matrix(1:12,nrow = 3,byrow = TRUE)
> x
[,1] [,2] [,3] [,4]
[1,] 1 2 3 4
[2,] 5 6 7 8
[3,] 9 10 11 12
> x[1,1] <- 100 #修改數(shù)據(jù)【修改指定單元格,修改指定列住涉,with 關(guān)聯(lián)修改】
> x
[,1] [,2] [,3] [,4]
[1,] 100 2 3 4
[2,] 5 6 7 8
[3,] 9 10 11 12
> x[,2] <- 0
> x
[,1] [,2] [,3] [,4]
[1,] 100 0 3 4
[2,] 5 0 7 8
[3,] 9 0 11 12
############ 排序&去重 補(bǔ)充學(xué)習(xí)https://mp.weixin.qq.com/s/0D9TyYqETCuIAWI0f_LvIQ
排序
sort # 單列排序返回值
order # 單列排序返回索引
rank # 單列排序返回“秩”
arrage # 多列排序
reorder # 用在繪圖中
去重
unique # 單向量/多列完全重復(fù)去重
duplicated # 函數(shù)
############ 數(shù)據(jù)篩選(邏輯) 條件篩選麸锉、&、| 補(bǔ)充學(xué)習(xí) 《補(bǔ)充-R 語(yǔ)言 邏輯運(yùn)算:TRUE_FALSE _ 專(zhuān)題3.pdf》
關(guān)于邏輯 可適當(dāng)補(bǔ)充apply系列函數(shù)
& 和 且
TRUE & TRUE
TRUE & FALSE
| 或
TRUE | TRUE
TRUE | FALSE
xor異或:當(dāng)對(duì)應(yīng)元素不等時(shí)返回TRUE
xor(T,F) #返回TRUE
xor(T,T)
xor(F,F)
所有為真T舆声,返回T
all(T,T)
all(T,F)
任意為真T花沉,返回T
any(T,F)
any(F,F)
> # & 和 且
> TRUE & TRUE
[1] TRUE
> TRUE & FALSE
[1] FALSE
> # | 或
> TRUE | TRUE
[1] TRUE
> TRUE | FALSE
[1] TRUE
> #xor異或:當(dāng)對(duì)應(yīng)元素不等時(shí)返回TRUE
> xor(T,F) #返回TRUE
[1] TRUE
> xor(T,T)
[1] FALSE
> xor(F,F)
[1] FALSE
> # 所有為真T,返回T
> all(T,T)
[1] TRUE
> all(T,F)
[1] FALSE
> # 任意為真T媳握,返回T
> any(T,F)
[1] TRUE
> any(F,F)
[1] FALSE
options(digits=3) # 設(shè)置有效數(shù)字
NA # 缺失值
dplyr包 的下述五個(gè)函數(shù)用法
filter # 篩選:
arrange # 排列:
select # 選擇:
mutate # 變形:
summarise # 匯總:
group_by #分組:
示例 分組匯總計(jì)算
data("iris")
str(iris)
library(dplyr)
iris %>%
group_by(Species) %>%
summarise(mean_Petal.Length = mean(Petal.Length),
yangbenshu = n(),
max_Petal.Length = max(Petal.Length))
> library(dplyr)
> iris %>%
+ group_by(Species) %>%
+ summarise(mean_Petal.Length = mean(Petal.Length),
+ yangbenshu = n(),
+ max_Petal.Length = max(Petal.Length))
# A tibble: 3 x 4
Species mean_Petal.Length yangbenshu max_Petal.Length
<fct> <dbl> <int> <dbl>
1 setosa 1.46 50 1.9
2 versicolor 4.26 50 5.1
3 virginica 5.55 50 6.9
tidyr包 的下述四個(gè)函數(shù)用法
gather # 寬數(shù)據(jù)轉(zhuǎn)為長(zhǎng)數(shù)據(jù):(excel透視表反向操作)
spread # 長(zhǎng)數(shù)據(jù)轉(zhuǎn)為寬數(shù)據(jù):(excel透視表功能)
unit # 多列合并為一列:
separat # 將一列分離為多列