以RNA轉(zhuǎn)錄組為例:
#!/bin/bash
threads=16
#對于普玉
rawdata_in=/sharedata/home/likun/zengjigang/project/rnaseq/results/02_fastp
fastqc_out=/sharedata/home/likun/zengjigang/project/rnaseq/results/01_fastqc
hisat2_genome=/sharedata/home/likun/zengjigang/project/rnaseq/results/04_genomedata/genome_tran
hisat2_sam=/sharedata/home/likun/zengjigang/project/rnaseq/results/05_hisat2
sam_view=/sharedata/home/likun/zengjigang/project/rnaseq/results/06_samtools/umsorted
bam_sort=/sharedata/home/likun/zengjigang/project/rnaseq/results/06_samtools/sorted
sort_flagstat=/sharedata/home/likun/zengjigang/project/rnaseq/results/06_samtools/flagstat
gtf_path=/sharedata/home/likun/zengjigang/project/rnaseq/raw_data/reference/b73v4.gtf
featurecounts_out=/sharedata/home/likun/zengjigang/project/swrnaseq/results/09_featureCount
multiqc_out=/sharedata/home/likun/zengjigang/project/swrnaseq/results/10_multiqc
#--------------------------------------------------------------------------------------------------
#構(gòu)造命令的函數(shù)
function rnaseq() #和子進程中的命令相對應(yīng)
{
#fastqc -t ${threads} -q -o ${fastqc_out} \
# ${rawdata_in}/${name}_1.fastq.gz \
# ${rawdata_in}/${name}_2.fastq.gz \
# &&
hisat2 -p ${threads} --dta -x ${hisat2_genome} \
-1 ${rawdata_in}/${name}_1.fastq.gz \
-2 ${rawdata_in}/${name}_2.fastq.gz \
-S /sharedata/home/likun/zengjigang/project/rnaseq/results/05_hisat2/${name}.sam &&
samtools view -@ ${threads} -bS -1 /sharedata/home/likun/zengjigang/project/rnaseq/results/05_hisat2/${name}.sam > \
${sam_view}/${name}.unsorted.bam &&
samtools sort -@ ${threads} -m 800M -o ${bam_sort}/${name}.sort.bam \
${sam_view}/${name}.unsorted.bam &&
samtools flagstat -@ ${threads} ${bam_sort}/${name}.sort.bam > ${sort_flagstat}/${name}.sort.bam.flagstat
}
# --------------------------------------------------------------------------------------------------
# 創(chuàng)建一個管道
mkfifo mylist
# 給子進程管道綁定文件描述符4
exec 4<>mylist
# 再創(chuàng)建一個管道(鎖文件)腕让,用于解決線程安全問題纯丸,和唯一子進程配對
mkfifo mylock
# 綁定文件描述符6
exec 6<>mylock
# 事先向鎖文件中插入1條數(shù)據(jù)(解鎖)
echo >mylock
# 開啟5個子進程
for ((i=1; i <= 5; i++)); do
# 這里的 & 會開啟一個子進程執(zhí)行
{
# 先讀取鎖文件(加鎖)觉鼻,由于鎖文件中只有1條數(shù)據(jù)队橙,讀取完之后鎖文件空了其他子進程再讀取時只能等待
while read -t 1 < mylock && read -t 1 name <mylist; do
# 讀取到業(yè)務(wù)數(shù)據(jù)后立即向?qū)懭?條數(shù)據(jù)到鎖文件(解鎖),讓其他子進程繼續(xù)讀取數(shù)據(jù)
echo >mylock
#以下是子進程拿著for變量執(zhí)行命令仇矾,這時候子進程還沒還牌子
name=`basename ${name/_1.fastq.gz}` #basename是去掉路徑前綴只保留文件名前綴贮匕,“/”表示去掉變量截取的后綴
echo "RNA_seq start ${name}" #echo表示可視化輸出命令結(jié)果是什么樣
rnaseq ${name} #表示函數(shù)名和它一樣
done
} &
done
# for進行的循環(huán)收到子進程的牌子花枫,并將其處理的變量全部拿給管道
for name in `ls /sharedata/home/likun/zengjigang/project/rnaseq/results/02_fastp/*_1.fastq.gz`; do
echo ${name} > mylist
done
# 使用 wait 命令阻塞當前進程,直到所有子進程全部執(zhí)行完
wait
echo "Finish all loops!!!"
# 全部結(jié)束后解綁文件描述符并刪除管道
exec 4<&-
exec 4>&-
rm -f mylist
exec 6<&-
exec 6>&-
rm -f mylock
#--------------------------------------------------------------
# 運行featureCounts對所有樣本進行基因水平定量
ls ${bam_sort}/*.sort.bam | do
featureCounts -T ${threads} \
-p \
-t exon \
-g gene_id \
-a ${gtf_path} \
-o ${featurecounts_out}/sw_counts.280.txt \
${bam_sort}/*.sort.bam
multiqc ${featurecounts_out}/sw_counts.280.txt.summary -o ${multiqc_out}/sw_counts.280.summary
multiqc ${sort_flagstat}/${name}.sort.bam.flagstat -o ${multiqc_out}/bam_sort_flagstat
#echo "Finish all analysis!!!"