RNA-seq流程-從SRR下載到得到表達(dá)矩陣
1.數(shù)據(jù)下載
在~/project/new/路徑下蜡峰,將SRR號(hào)重定向到一個(gè)id里
image-20190102174323983
#(文件夾都是建在~/project/new/路徑下)
mkdir sra
cd sra
cat /four/mm/project/new/id |while read id;do (prefetch ${id});done
image-20190102165414678
將SRA數(shù)據(jù)轉(zhuǎn)成fastq
#在~/project/new/路徑下
mkdir fq
cd fq
ls /four/mm/project/new/sra/*sra | while read id; do (fastq-dump --gzip --split-3 -O ./ ../sra/${id}); done
另外:cat ./id |while read id;do (fastq-dump --gzip --split-3 -O ./ ${id}); done
image-20190102171400309
2.質(zhì)控
#在~/project/new/路徑下
mkdir fq.qc
cd fq.qc
###1.data statistics
ls ../fq/*.fastq.gz |while read id ;do (fastqc -t 2 -o ./ ${id});done
multiqc ./ #整合報(bào)告結(jié)果
image-20190102174941909
image-20190102173950514
###2.filter data
#雙端測(cè)序
ls ~/fqmm/*_1.fastq.gz >1
ls ~/fqmm/*_2.fastq.gz >2
paste 1 2 > config
cat >qc.sh#下面是要輸入的內(nèi)容
source activate rna
bin_trim_galore=trim_galore
dir='/four/mm/project/new/clean'
cat $1 |while read id
do
arr=($id)
fq1=${arr[0]}
fq2=${arr[1]}
nohup $bin_trim_galore -q 25 --phred33 --length 36 -e 0.1 --stringency 3 --paired -o $dir $fq1 $fq2 &
done
source deactivate
#單端測(cè)序(當(dāng)前路徑是~/project/new)(注:此時(shí)od不僅僅是SRR號(hào),還包括了.fastq.gz,所以輸入文件的循環(huán)是${id},而不是${id}.fastq.gz)
mkdir clean
cat ./fq/od |while read id ;do (trim_galore --phred33 -q 25 -e 0.1 --length 36 --stringency 3 -o ./clean/ ./fq/${id}); done
image-20190102162723492
image-20190102115830065
3.比對(duì)+bam排序
#雙端測(cè)序
nohup cat /four/mm/project/new/id |while read id;do #復(fù)制一份id到當(dāng)前路徑下
hisat2 -p 5 -x ~/index/grch38/genome #比對(duì)
-1 ${id}_1_val_1.fq.gz
-2 ${id}_2_val_2.fq.gz | #管道符,生成的文件進(jìn)行下一步
samtools sort -@ 5 -o ~/rna.GSE52778/sort.bam/${id}.sort.bam - #bam排序
done &
#單端測(cè)序 當(dāng)前路徑~/project/new/clean/
mkdir sort.bam
nohup cat /four/mm/project/new/id |while read id;do (hisat2 -p 5 -x /four/mm/index/hisat/hg38/genome -U ${id}_trimmed.fq.gz|samtools sort -@ 5 -o ../sort.bam/${id}.sort.bam -) done &
image-20190102161629716
4.計(jì)數(shù)
#雙/單端測(cè)序
nohup cat /four/mm/project/new/id |while read id;do featureCounts -T 5 -p -t exon -g gene_id -a /four/mm/project/gtf/gencode.v29.annotation.gtf
-o ./featureCounts/all.counts.txt ./sort.bam/${id}.sort.bam; done &
終于完整的能得到表達(dá)矩陣了景描,能顯示下面這個(gè)圖片达罗,真是好漂釀舟肉!
image-20190102163758350