setwd("E:/R")
leadership<-read.xlsx("leadership.xlsx",1)
4.2創(chuàng)建新變量
(1),mydata<-data.frame(x1=c(2,2,6,4),x2=c(3,4,2,8))
mydata$sumx<-mydata&x1+mydata&x2
mydata
(2)策治,attach(mydata)
mydata$sumx<-x1+x2
mydata$sumx<-(x1+x2)/2
detach(mydata)
(3)脓魏,(優(yōu)先推薦)mydata<-transform(mydata,sumx=x1+x2,meanx=(x1+x2)/2)
leadership
4.3變量的重編碼
(1)leadership$afe[leadership$afe == 99]<-NA
leadership$gfecat[leadership$afe>75]<-"Elder"
leadership$gfecat[leadership$afe>=55&leadership$afe<=75]<-"middle"
leadership$gfecat[leadership$afe<55]<-"young"
leadership
(2)(推薦)leadership<-within(leadership,{
afecat<-NA
afecat[afe>75]<-"elder"
afecat[afe>=55&afe<=75]<"middle"
afecat[afe<55]<-"young"})
4.4變量的重命名
(1)fix(leadership)#打開編輯器,編輯器中去重命名
(2)names(leadership)
names(leadership)[2]<-("testdata")
names(leadership)[6:10] <-c("c1","c2","c3","c4","c5")
(3)install.packages("plyr")
library("plyr")
leadership<-rename(leadership,c(manager="manageID",testdata="testdata1"))
4.5缺失值
y<-c(1,2,3,NA)
is.na(y)
is.na(leadership[,6:10])
#檢測缺失值
(1)缺失值被認(rèn)為是不可比較的通惫,即便和自己比較
(2)R并不把無限的或者不可能出現(xiàn)的數(shù)值標(biāo)記為缺失值
#4.5.1重編碼某些值為缺失值
setwd("E:/R")
leadership<-read.xlsx("leadership.xlsx",1)
leadership$afe[leadership$age==99]<-NA
#4.5.2分析中排除缺失值
x<-c(1,2,NA,3)
y<-x[1]+x[2]+x[3]+x[4]
z<-sum(x)
y<-sum(x,na.rm=TRUE)#移除缺失值后剩余的值進(jìn)行計算
#使用na.omit()刪除不完整的觀測
newdata<-na.omit(leadership)
newdata
#4.6 日期值
Sys.Date()
date()
today<-Sys.Date()
format(today,format="%B %d %Y")
startdate<-as.Date("2004-02-13")
enddate<-as.Date("2011-01-22")
days<-enddate-startdate
days
today<-Sys.Date()
dob<-as.Date("1994-05-29")
difftime(today,dob,units="days")
#4.6.1將日期轉(zhuǎn)換為字符型變量
strDates<-as.character(dates)
#4.7類型轉(zhuǎn)換
#4.8數(shù)據(jù)排序
newdate<-leadership[order(leadership$afe),]
attach(leadership)
newdate<-leadership[order(mate,afe),]
detach(leadership)
newdate
#4.9數(shù)據(jù)集的合并
#4.9.1向數(shù)據(jù)框添加列
total<-merge(dataframeA,dataframeB,by="ID")
#用clind()進(jìn)行橫向合并
total<-clind(A,B)
#4.9.2向數(shù)據(jù)框添加行
縱向添加行
total<-rbind(dataframeA,dataframeB)
需擁有相同的變量
(1)刪除dataframeA中多余變量
(2)在dataframeB中創(chuàng)建追加變量并設(shè)為NA(缺失)
常用于添加觀測
4.10數(shù)據(jù)集取子集
4.10.1選入(保留)變量
newdata<-leadership[,c(6:10)]
myvars<-c("q1","q2","q3","q4","q5")
newdata<-leadership[myvars]
myvars<-paste("q",1:4,sep="")
newdata<-leadership[myvars]
newdata
4.10.2剔除(丟棄)變量
myvars<-names(leadership)%in%c("q3","q4")
leadership$q3<-leadership$q4<-NULL
4.10.3選入觀測
newdata<-leadership[1:3,]
attach(leadership)
leadership
newdata<-leadership[mate=='m'& afe>30,]
newdata
newdata<- subset(leadership,afe>= 35|afe<24,
select=c(q1,q2))
newdata