#5高級(jí)數(shù)據(jù)管理
y<-data.frame(name=character(0),math=numeric(0),science=numeric(0),english=numeric(0))
mydata<-edit(y)
mydata
#5.2數(shù)值和字符處理函數(shù)
#5.2.1數(shù)學(xué)函數(shù)
#5.2.2統(tǒng)計(jì)函數(shù)
dada<-scale(mydata)#數(shù)據(jù)標(biāo)準(zhǔn)化
#5.2.3概率函數(shù)
set.seed()#設(shè)定隨機(jī)種子
runif(5)#生成服從正態(tài)分布的偽隨機(jī)數(shù)
#生成服從多元正太分布的數(shù)據(jù)
library(MASS)
options(digits=3)
set.seed(1234)
mean<-c(230.7,146.7,3.6)
sigma <- matrix(c(15360.8,6721.2,-47.1,
6721.2,4700.9,-16.5,
-47.1,-16.5,0.3),nrow=3,ncol=3)
mydata<-mvrnorm(500,mean,sigma)
mydata<-as.data.frame(mydata)
names(mydata)<-c("y","x1","x2")
dim(mydata)
head(mydata,n=10)
#5.2.4字符處理函數(shù)
#5.2.5其他實(shí)用函數(shù)
#5.2.6將函數(shù)應(yīng)用于矩陣和數(shù)據(jù)框
#將函數(shù)應(yīng)用于數(shù)據(jù)對(duì)象
a<-5
sqrt(a)
b<- c(1.243,5.654,2.99)
round(b)
c<-matrix(runif(12),nrow=3)
c
log(c)
mean(c)
#將一個(gè)函數(shù)應(yīng)用到矩陣的所有行或列中
mydata<-matrix(rnorm(30),nrow=6)
mydata
apply(mydata,1,mean)#計(jì)算每行平均值
apply(mydata,2,mean)#計(jì)算每列平均值
apply(mydata,2,mean,trim=0.2)#計(jì)算每行平均值
#5.3數(shù)據(jù)處理難題的一套解決方案
options(digits=2)
setwd("e:/r")
library(xlsx)
student<-read.xlsx("student.xlsx",1)
student
roster<-data.frame(student,math,science,english,
stringAsFactor=FASLE)
z<-scale(#未完)
5-6
options(digits=2)
student<-c("J D","A W","B M",
"D J","J M","C C",
"R Y","G K","J E",
"M R")
math<-c(502,600,412,358,495,512,410,625,573,522)
science<-c(95,99,80,82,75,85,80,95,89,86)
english<-c(25,22,18,15,20,28,15,30,27,18)
roster<-data.frame(student,math,science,english,stringAsFactors=FALSE)
z<-scale(roster[,2:4])
score<-apply(z,1,mean)
roster<-cbind(roster,score)
y<-quantile(score,c(.8,.6,.4,.2))
roster$grade[score>=y[1]]<-"A"
roster$grade[score<y[1]&score>=y[2]]<-"B"
roster$grade[score<y[2]&score>=y[3]]<-"C"
roster$grade[score<y[3]&score>=y[4]]<-"D"
roster$grade[score<=y[4]]<-"F"
roster
class(roster$student)
roster$student<-as.character(roster$student)
name <- strsplit((roster$student)," ")#roster$student必須是字符變量才能用strsplit草慧?
LN<-sapply(name, "[",2)
FN<-sapply(name, "[",1)
roster<-cbind(FN,LN,roster[,-1])
roster<-roster[order(LN,FN),]
roster
#5.4控制流
#5.4.1重復(fù)和循環(huán)
for( i in 1:10) print("H")
i<-10
while(i>0) {print("h");i<- i-1}
#5.4條件執(zhí)行
#5.4.1 if-else
attach(roster)
if(is.character(math)) math<-as.factor(math)
if(!is.character(LN)) math<-as.factor(LN)else print("gggg")
#5.4.2 ifelse,二元贡耽,輸入輸出均為向量時(shí)
attach(roster)
ifelse(score>0.5,print("P"),print("f"))
outcome<-ifelse(score>0.5,"P","f")
5.4.3switch
feelings<-c("s","a")
for(i in feelings)
print(
switch(i,
h="iii",
a="tttt",
s="ccc",
a="ccc"))
#5.5用戶(hù)自編函數(shù)
mystats<-function(x,parametric=TRUE,print=FALSE){
if(parametric){center<-mean(x);spread<-sd(x)
}else{
center<-median(x);spread<-mad(x)
}
if(print¶metric){
cat("mean=",center,"\n","sd=",spread,"\n")
}else{
cat("median=",center,"\n","mad=",spread,"\n")
}
result<-list(center=center,spread=spread)
return(result)
}
#驗(yàn)證
set.seed(1234)
x<-rnorm(500)
y<-mystats(x,parametric=TRUE,print=FALSE)
#自編函數(shù)2
mydate<-function(type="long"){
switch(type,
long=format(Sys.time(),"%A %B %d %Y"),
short=format(Sys.time(),"%m-%d-%y"),
cat(type,"is not a recognizede type\n"))}
mydate("long")
mydate("short")
mydate()
mydate("dvsdf")
5.6整合和重構(gòu)
5.6.1轉(zhuǎn)置
cars<-mtcars[1:5,1:4]
cars
t(cars)
5.6.2整合數(shù)據(jù)
options(digits=3)
attach(mtcars)
mtcars
aggdata<-aggregate(mtcars,by=list(cyl,gear),FUN=mean,na.rm=TRUE)
aggdata
5.6.3reshape包融合和重鑄melt,dcast
library(reshape2)
md<-melt(mydata,id=c("id,time"))