1 軟件安裝
http://www.reibang.com/p/eb89ab4af035
linux平臺(tái)下需要安裝的軟件:fastqc洁闰,fastp邮偎,hisat2驱敲,samtools,htseq
2 獲取基因組序列和基因組注釋文件
大鼠基因組和注釋文件:
Rattus_norvegicus - Ensembl genome browser 105
wget -c https://ftp.ensembl.org/pub/release-112/fasta/rattus_norvegicus/dna/Rattus_norvegicus.mRatBN7.2.dna.toplevel.fa.gz
wget -c https://ftp.ensembl.org/pub/release-112/gtf/rattus_norvegicus/Rattus_norvegicus.mRatBN7.2.112.gtf.gz
3 構(gòu)建索引文件
hisat2-build -p 2 Rattus_norvegicus.mRatBN7.2.dna.toplevel.fa Rat
4 過(guò)濾raw reads
mkdir -p fastp
ls *1.fastq.gz|while read id;
do
fastp -5 20 -i ${id%_*}_1.fastq.gz -I ${id%_*}_2.fastq.gz \
-o ${id%_*}_1.clean.fq.gz -O ${id%_*}_2.clean.fq.gz \
-j ./fastp/${id%_*}.json -h ./fastp/${id%_*}.html;
done
5 比對(duì)
ls *1.clean.fq.gz|while read id;
do
hisat2 -t -p 2 -x /media/lzx/0000678400004823/Indexs/Hisat2/Rat/Rat \
-1 $id -2 ${id%_*}_2.clean.fq.gz \
2>${id%%_*}.hisat2.log \
|samtools sort -@ 2 -o ${id%_*}_ht2p.bam
done