數(shù)據(jù)讀寫練習(xí)是生信技能樹生信爆款入門課程R語言部分Day5的講到的一個重要知識點(diǎn)瑰抵。
為加深理解,現(xiàn)在做下練習(xí)鞏固。
Last compiled on 01/10/21
rm(list = ls())
1.讀取ex1.txt
a <- read.table('ex1.txt')
dim(a)
## [1] 75 6
head(a)
## V1
## 1 title
## 2 Illumina Sentrix Array Matrix (SAM) - GoldenGate Methylation Cancer Panel I
## 3 Illumina HumanMethylation27 BeadChip (HumanMethylation27_270596_v.1.2)
## 4 Illumina HumanMethylation450 BeadChip (HumanMethylation450_15017482)
## 5 GE Healthcare/Amersham Biosciences CodeLink鈩\xa2 ADME Rat 16-Assay Bioarray
## 6 [AG] Affymetrix Arabidopsis Genome Array
## V2 V3 V4 V5
## 1 gpl bioc_package manufacturer organism
## 2 GPL15380 GGHumanMethCancerPanelv1 Illumina Homo sapiens
## 3 GPL8490 IlluminaHumanMethylation27k Illumina, Inc. Homo sapiens
## 4 GPL13534 IlluminaHumanMethylation450k Illumina, Inc. Homo sapiens
## 5 GPL2898 adme16cod GE Healthcare Rattus norvegicus
## 6 GPL71 ag Affymetrix Arabidopsis thaliana
## V6
## 1 data_row_count
## 2 1536
## 3 27578
## 4 485577
## 5 1280
## 6 8297
2.讀取ex2_B cell receptor signaling pathway.csv
ex2 <- read.csv('ex2_B cell receptor signaling pathway.csv',
row.names = 1)
dim(ex2)
## [1] 18 168
ex2[1:4,1:4]
## TCGA.06.0238.01A TCGA.06.0171.02A TCGA.28.5218.01A TCGA.06.0130.01A
## NCKAP1L 10.96088 13.67818 11.69558 12.41409
## SYK 10.64797 12.99044 11.07856 11.88787
## PTPRC 10.61789 13.49278 11.26111 12.33504
## PTPN6 10.49375 12.35558 10.58999 11.66260
3.讀取GSE32575_series_matrix.txt时肿,賦值給gse释簿。
gse <- read.table('GSE32575_series_matrix.txt',
comment.char = '!',
header = T,
row.names = 1)
dim(gse)
## [1] 336 48
gse[1:4,1:4]
## GSM807339 GSM807340 GSM807341 GSM807342
## ILMN_1343289 19525.4400 20503.6100 18821.2200 17943.6300
## ILMN_1343290 20599.1000 21696.7000 16206.9200 18101.9800
## ILMN_1343291 25829.9200 24742.1800 23758.1200 24592.3600
## ILMN_1343292 383.6296 353.3019 303.2715 375.0452
4.描述gse的屬性
#View(gse)
as.matrix(gse)[1:4,1:4]
## GSM807339 GSM807340 GSM807341 GSM807342
## ILMN_1343289 19525.4400 20503.6100 18821.2200 17943.6300
## ILMN_1343290 20599.1000 21696.7000 16206.9200 18101.9800
## ILMN_1343291 25829.9200 24742.1800 23758.1200 24592.3600
## ILMN_1343292 383.6296 353.3019 303.2715 375.0452
class(gse)
## [1] "data.frame"
5.將gse導(dǎo)出為新的txt和csv文件。
write.table(gse,'z.txt')
write.csv(gse,'zz.csv')
6.將gse保存為Rdata并加載贴硫。
save(gse,file = 'ex.Rdata')#file = 必須寫
rm(list = ls())
load('ex.Rdata')
練習(xí)4-1:
1.讀取complete_set.txt(已保存在工作目錄)
a <- read.table('complete_set.txt')
#先 dim
dim(a)
## [1] 51 20
a[1:4,1:4]
## V1 V2 V3 V4
## 1 geneA geneB geneC geneD
## 2 -0.635020187971398 -0.49728008811353 0.514896730700242 -1.01508182502931
## 3 0.91605661780324 -0.545381308500589 1.20238322656491 0.956212067289626
## 4 0.805995294157758 -0.315914513323816 0.27825197143441 -0.727119736260533
# 讀入之后 要先查看數(shù)據(jù) 是必經(jīng)步驟
# 需要header= T
# 否則列名被改變了,可以看出列名多了V1
a <- read.table('complete_set.txt',header = T)
dim(a)
## [1] 50 20
a[1:4,1:4]
## geneA geneB geneC geneD
## 1 -0.6350202 -0.49728009 0.5148967 -1.0150818
## 2 0.9160566 -0.54538131 1.2023832 0.9562121
## 3 0.8059953 -0.31591451 0.2782520 -0.7271197
## 4 0.5380081 -0.06739211 -0.6237648 -1.6250202
#正確
#先 dim
2.查看有多少行馋吗、多少列
dim(a)
## [1] 50 20
3.獲取行名和列名
rownames(a)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15"
## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
## [46] "46" "47" "48" "49" "50"
colnames(a)
## [1] "geneA" "geneB" "geneC" "geneD" "geneE" "geneF" "geneG" "geneH" "geneI"
## [10] "geneJ" "geneK" "geneL" "geneM" "geneN" "geneO" "geneP" "geneQ" "geneR"
## [19] "geneS" "geneT"
4.導(dǎo)出為csv格式
write.csv(a,'a.csv')
5.保存為Rdata
save(a,file = 'a.Rdata')
6.加載class.Rdata,查看數(shù)據(jù)類型
load('class.Rdata')
#環(huán)境變量出現(xiàn)了y的數(shù)據(jù)
#在環(huán)境變量里面 可以看出都是字符型chr
class(y[2])#從一行一列也可以看出數(shù)據(jù)類型
## [1] "character"
y#從矩陣也可以看出
## gene1 gene2 gene3 gene4 gene5 gene6 grouplist
## GSM1 "40" "15" "22" "600" "25" "123" "control"
## GSM2 "20" "45" "77" "544" "33" "124" "control"
## GSM3 "51" "12" "26" "350" "30" "55" "control"
## GSM4 "46" "11" "20" "390" "45" "334" "treat"
## GSM5 "38" "12" "24" "260" "20" "543" "treat"
## GSM6 "49" "10" "25" "220" "33" "239" "treat"
class(y)#
## [1] "matrix" "array"
#字符和數(shù)字的數(shù)據(jù)框 轉(zhuǎn)為矩陣 就會這樣
高階數(shù)據(jù)讀取指南http://www.reibang.com/p/4ea320c0dcc6