-
group_list分組
第一步:清除之前所有變量+加載之前的數(shù)據(jù)
> rm(list = ls()) #表示清除所有變量;ls當(dāng)前目錄賦值給列表,接著清除
#ls() 返回global environment 里面的所有object的名字苏遥。
#是一個character vector
> load(file = "step1output.Rdata")#加載工作目錄下之前保存的數(shù)據(jù)
> library(stringr)#加載str包
第二步,確認(rèn)分組的目標(biāo)
#前文提到的pd中有臨床信息赡模,其中title中顯示了control組和實驗組
> pd$title
[1] "A375 cells 24h Control rep1" "A375 cells 24h Control rep2"
[3] "A375 cells 24h Control rep3" "A375 cells 24h Vemurafenib rep1"
[5] "A375 cells 24h Vemurafenib rep2" "A375 cells 24h Vemurafenib rep3"
pd
第三步暖眼,分組向量生成
> group_list=c(rep("control",times=3),rep("treat",times=3))
> group_list
[1] "control" "control" "control" "treat" "treat" "treat"
> #第三類,ifelse
> library(stringr)#這個包可以用函數(shù)str_detect()
> group_list=ifelse(str_detect(pd$title,"Control"),"control","treat")
> group_list
[1] "control" "control" "control" "treat" "treat" "treat"
#第一個為判斷條件纺裁,第二為true,第三false
#設(shè)置參考水平,對照在前司澎,處理在后
#str_detect(string字符串, pattern匹配字符),返回邏輯值欺缘,是檢測函數(shù);
#用于檢測字符串中是否存在某種匹配模式挤安;
#val <- c("abca4", 123, "cba2")谚殊;str_detect(val, "a")檢查Val是否有字符串a(chǎn);TRUE FALSE TRUE
#pd$title中有6個,返回6個蛤铜,TRUE返回第一個control;FALSE返回為treatment
第四步嫩絮,設(shè)置因子
> group_list = factor(group_list,#生成因子的意義,后面的差異分析是處理/對照
levels = c("control","treat"))
#levels規(guī)定誰在前面誰是對照丛肢,注意順序,所有加用level
#芯片注釋剿干,查找芯片平臺對應(yīng)的包,到此腳本中替換
-
芯片注釋
芯片注釋蜂怎,查找芯片平臺對應(yīng)的包,到此腳本中替換
gpl #取網(wǎng)頁搜索GPL編號置尔,ctrl+F杠步,獲取相應(yīng)的注釋包
http://www.bio-info-trainee.com/1399.html
芯片探針與基因的對應(yīng)關(guān)系http://www.bio-info-trainee.com/1399.html
image.png
第一步,安裝并加載hugene10sttranscriptcluster.db包
> gpl #取網(wǎng)頁搜索GPL編號榜轿,ctrl+F幽歼,獲取相應(yīng)的注釋包
[1] "GPL6244"
>if(!require(hugene10sttranscriptcluster.db))BiocManager::install("hugene10sttranscriptcluster.db")
#require()表示加載,返回的是邏輯值谬盐,TRUE時表示已加載甸私,F(xiàn)ALSE表示未加載;飞傀!表示否定
#先安裝皇型;ls("package:tidyr")函數(shù)用法
> library(hugene10sttranscriptcluster.db)
> ls("package:hugene10sttranscriptcluster.db")#顯示包里的所有目錄
[1] "hugene10sttranscriptcluster"
[2] "hugene10sttranscriptcluster.db"
[3] "hugene10sttranscriptcluster_dbconn"
[4] "hugene10sttranscriptcluster_dbfile"
[5] "hugene10sttranscriptcluster_dbInfo"
[6] "hugene10sttranscriptcluster_dbschema"
[7] "hugene10sttranscriptclusterACCNUM"
[8] "hugene10sttranscriptclusterALIAS2PROBE"
[9] "hugene10sttranscriptclusterCHR"
[10] "hugene10sttranscriptclusterCHRLENGTHS"
[11] "hugene10sttranscriptclusterCHRLOC"
[12] "hugene10sttranscriptclusterCHRLOCEND"
[13] "hugene10sttranscriptclusterENSEMBL"
[14] "hugene10sttranscriptclusterENSEMBL2PROBE"
[15] "hugene10sttranscriptclusterENTREZID"
[16] "hugene10sttranscriptclusterENZYME"
[17] "hugene10sttranscriptclusterENZYME2PROBE"
[18] "hugene10sttranscriptclusterGENENAME"
[19] "hugene10sttranscriptclusterGO"
[20] "hugene10sttranscriptclusterGO2ALLPROBES"
[21] "hugene10sttranscriptclusterGO2PROBE"
[22] "hugene10sttranscriptclusterMAP"
[23] "hugene10sttranscriptclusterMAPCOUNTS"
[24] "hugene10sttranscriptclusterOMIM"
[25] "hugene10sttranscriptclusterORGANISM"
[26] "hugene10sttranscriptclusterORGPKG"
[27] "hugene10sttranscriptclusterPATH"
[28] "hugene10sttranscriptclusterPATH2PROBE"
[29] "hugene10sttranscriptclusterPFAM"
[30] "hugene10sttranscriptclusterPMID"
[31] "hugene10sttranscriptclusterPMID2PROBE"
[32] "hugene10sttranscriptclusterPROSITE"
[33] "hugene10sttranscriptclusterREFSEQ"
[34] "hugene10sttranscriptclusterSYMBOL" ###重要
[35] "hugene10sttranscriptclusterUNIGENE"
[36] "hugene10sttranscriptclusterUNIPROT"
#View(hugene10sttranscriptclusterSYMBOL)
#str(hugene10sttranscriptclusterSYMBOL)
#View(hugene10sttranscriptclusterSYMBOL)
第二步,將hugene10sttranscriptclusterSYMBOL中的數(shù)據(jù)用數(shù)據(jù)框封裝
> ids <- toTable(hugene10sttranscriptclusterSYMBOL)#把包里的數(shù)據(jù)變成數(shù)據(jù)框
#toTable是一種能夠以數(shù)據(jù)框的形式來操作一個Bimap對象的方法助析,
#也就是把Bimap對象轉(zhuǎn)換為一個數(shù)據(jù)框犀被,
#這些方法是Bimap interface方法的一部分。
#Bimap指的是一種映射關(guān)系外冀,例如探針的編號與基因名稱之間的映射
head(ids)#只有兩列數(shù)據(jù)probe_id和symbol
probe_id symbol
1 7896759 LINC01128
2 7896761 SAMD11
3 7896779 KLHL17
4 7896798 PLEKHN1
5 7896817 ISG15
6 7896822 AGRN
#View(ids)
save(exp,group_list,ids,file = "step2output.Rdata")
繼續(xù)了解probe_id和symbol在該分析中的作用