一、文章數(shù)據(jù)下載
安裝miniconda
sudo apt-get install wget
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
source ~/.bashrc
中科大源
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/main/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/pkgs/free/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/msys2/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/bioconda/
conda config --add channels http://mirrors.ustc.edu.cn/anaconda/cloud/menpo/
conda config --set show_channel_urls yes
清華源
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/??
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/??
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
?conda config --set show_channel_urls yes
安裝軟件
conda create -n rnaseq python=3
安裝軟件,保證在rnaseq環(huán)境中激活環(huán)境 conda activate rnaseq
安裝軟件conda install sra-tools
source activate rnaseq
質(zhì)控軟件fastqc multiqc trimmomatic cutadapt trim-galore
比對(duì)star hisat2 bowtie2 tophat bwa subread
計(jì)數(shù)conda install htseq bedtools deeptools salmon
conda install trimmomatic cutadapt trim-galore star hisat2 bowtie2 tophat bwa subread htseq bedtools deeptools salmon
下載數(shù)據(jù)
下載SRR_Acc_List.txt文件,
cat SRR_Acc_List.txt | while read id; do prefetch $id; done
掛在后臺(tái)下載:
cat id | while read id; do (prefetch $id &); done
轉(zhuǎn)化為fastq文件
fastq-dump SRR10695769 --gzip --split-3 -O /mnt/f/project/HN/
批量轉(zhuǎn)換sra到fq格式
ls /mnt/f/project/N/sra/*.sra? | while read id; do ( nohup fastq-dump --gzip --split -3 -O /mnt/f/project/N/fastq? ${id} & ); done
二诅迷、質(zhì)控
fastqc
ls /mnt/f/project/HN/fastq/*.gz | while read id; do ( nohup fastqc -q -t 4 -o /mnt/f/project/HN/fastqc ${id} & ); done
fastp
單端測(cè)序
fastp -i in.fq -o out.fq
for i in $(ls *.fastq.gz | sed s/.fastq.gz//g) 吃溅;do fastp -w 4? -i ${i}.fastq.gz? ?-o ./fastp_data/${i}.fastq.gz? -h ./fastp_result/${i}.html? -j ./fastp_result/${i}.json 鳍徽;done
ls *fastq.gz? |cut -d "_" -f 1 |sort -u | while read id; do?(nohup?fastp -w 4?-i ${id}.fastq.gz -o ?/fastp_data/?${id}.fq.gz??-h? /fastp_result/${id}.html? -j /fastp_result/${id}.json&); done
雙端測(cè)序
ls *1.fastq.gz |cut -d "_" -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}_1.fastq.gz -I ${id}_2.fastq.gz -o ./fastp_data/${id}_1.fastq.gz -O ./fastp_data/${id}_2.fastq.gz -h ./fastp_result/${id}.html -j ./fastp_result/${id}.json & ) ; done
#! /bin/bash
?#Used for rnaseq data by Fastp?
# 注意樣品名稱 (1)SRR10695753_1.fastq.gz ls *1.fq.gz |cut -d "_" -f 1 |sort -u |;${id}_1.fastq.gz #(2)Sample_Ck_1.R1.fq.gz ls *R1.fq.gz |cut -d "." -f 1 |sort -u |, ${id}.R1.fq.gz,${id}.R2.fq.gz
cd /mnt/g/project/N_batch/data/rawdata
?ls *R1.fq.gz |cut -d "." -f 1 |sort -u | while read id; do ( nohup fastp -w 4 -i ${id}.R1.fq.gz -I ${id}.R2.fq.gz -o /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R1.fq.gz -O /mnt/g/project/N_batch/data/cleandata/fastp/fastp_data/${id}_cleandata.R2.fq.gz -h /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.html -j /mnt/g/project/N_batch/data/cleandata/fastp/fastp_result/${id}.json & ) ; done
#trim_galore
#! /bin/bash?
#Used for rnaseq data by trim_galore?
cd /mnt/g/project/N_batch/data/?rawdata
ls *_1.fastq.gz >1
?ls *_2.fastq.gz >2?
paste 1 2 > config?
dir=/mnt/g/project/N_batch/data/cleandata/?trim_galore
?cat config | while read id do
?arr=${id}?
fq1=${arr[0]}
?fq2=${arr[1]}?
nohup trim_galore -q 25 --phred33 --length 35 -e 0.1 --stringency 3 --paired -o $dir $fq1 $fq2 &?
done
比對(duì)
#!/bin/bash
ls *fq.gz|cut -d"." -f 1 |sort -u |while read id;
do
nohup hisat2 -p 4 -x? /home/user/../genome_hist2/hisat2_css_index -U ${id}.fq.gz -S? ${id}.sam ;
done
雙端測(cè)序
#!/bin/bash?
ls *gz|cut -d"_" -f 1 |sort -u |while read id; do ls -lh ${id}_1.fastq.gz ${id}_2.fastq.gz nohup hisat2 -p 4 -x /home/user/../genome_hist2/hisat2_css_index -1 ${id}_1.fastq.gz -2 ${id}_2.fastq.gz -S /home/user/../HN/sam/${id}.sam 揪漩;done
轉(zhuǎn)bam排序
#!/bin/bash
ls *.sam | while read id; do (samtools sort -O bam -@ 5 -o $(basename ${id} ".sam").bam ${id}); done
ls *.bam | xargs -i samtools index {}
ls *.bam | while read id; do (samtools flagstat -@ 10 $id > $(basename ${id} ".bam").flagstat); done
計(jì)數(shù)
conda install subread -y
featureCounts -T 4 -f -t exon -g gene_id -a? ****.gtf -o all_id.count??? /*.bam