ATAC-seq Snakemake 流程

參考1
參考2

Understand the method

snakemake file

shell.executable("/bin/bash")
configfile: "./config.yaml"


(SAMPLES,READS,) = glob_wildcards("01fastq/{sample}_{read}.fastq.gz")
READS=["1","2"]

rule all:
    input:
        expand("06peak_macs2/{sample}_macs2_peaks.broadPeak", sample=SAMPLES)

rule fastqc:
    input:  "01fastq/{sample}_1.fastq.gz", "01fastq/{sample}_2.fastq.gz"
    output: "02fqc/{sample}_1_fastqc.zip", "02fqc/{sample}_2_fastqc.zip"
    log:    "00log/{sample}_fastqc"
    threads: 2
    params : jobname = "{sample}"
    message: "fastqc {input}: {threads}"
    shell:
        """
    # fastqc works fine on .gz file as well
        module load fastqc
        fastqc -o 02fqc {input[0]} 2> {log}
        fastqc -o 02fqc {input[1]} 2> {log}
        """

rule fastp:
    input:
        fwd="01fastq/{sample}_1.fastq.gz",
        rev="01fastq/{sample}_2.fastq.gz"
    output:
        fwd="03trim/{sample}_1_good.fq.gz",
        rev="03trim/{sample}_2_good.fq.gz",
        html="03trim/{sample}.html",
        json="03trim/{sample}.json"
    log:
        "00log/{sample}_fastp"
    message: "trim_adaptor {input}"
    shell:
        """
        ml fastp
        fastp -w 10 -h {output[html]} -j  {output[json]} -i {input[fwd]} -I {input[rev]} -o {output[fwd]} -O {output[rev]} 2> {log}
        """

## the later step will remove chrM from the bam file and coordinate sort the bam
## so I did not cooridnate sort the bam at this step to save some time.
rule align:
    input: "03trim/{sample}_1_good.fq.gz", "03trim/{sample}_2_good.fq.gz"
    output: "04aln/{sample}.sorted.bam", "00log/{sample}.align"
    threads: 10
    params: jobname = "{sample}"
    message: "aligning {input}: {threads} threads"
    log:
        bowtie2 = "00log/{sample}.align",
        markdup = "00log/{sample}.markdup"
    shell:
        """
        ## samblaster mark duplicates for read id grouped reads. I do not coordinate sort the bam
        ml bowtie/2 samblaster samtools
        bowtie2 --threads 10 --very-sensitive -x {config[idx_bt2]} -1 {input[0]} -2 {input[1]} 2> {log.bowtie2} \
        | samblaster 2> {log.markdup} \
        | samtools view -@ 10 -Sb - > {output[0]}
        """

# check number of reads mapped by samtools flagstat
rule flagstat_bam:
    input:  "04aln/{sample}.sorted.bam"
    output: "04aln/{sample}.sorted.bam.flagstat"
    log:    "00log/{sample}.flagstat_bam"
    threads: 2
    params: jobname = "{sample}"
    message: "flagstat_bam {input}: {threads} threads"
    shell:
        """
        ml samtools
        samtools flagstat {input} > {output} 2> {log}
        """


## shifting the reads are only critical for TF footprint, for peak calling and making bigwigs, it should be fine using the bams without shifting
# https://sites.google.com/site/atacseqpublic/atac-seq-analysis-methods/offsetmethods
rule remove_chrM_bam:
    input: "04aln/{sample}.sorted.bam"
    output: "05aln_exclude_chrM/{sample}_exclude_chrM.sorted.bam", "05aln_exclude_chrM/{sample}_exclude_chrM.sorted.bam.bai"
    log: "00log/{sample}_exclude_chrM_bam.log"
    threads: 10
    message: "excluding chrM from bam {input} : {threads} threads"
    params: jobname = "{sample}"
    shell:
        """
        ml samtools samblaster
        # remove duplicates and reads on chrM, coordinate sort the bam
        # samblaster expects name sorted bamq
        samtools view -h {input} | samblaster --removeDups \
        | grep -v -P '\tchrM\t' \
        | samtools view -Sb -F 4 - \
        | samtools sort -m 15G -@ 10 -T {input}.tmp -o {output[0]}
        samtools index {output[0]}
        """

rule flagstat_chrM_exclude_bam:
    input:  "05aln_exclude_chrM/{sample}_exclude_chrM.sorted.bam"
    output: "05aln_exclude_chrM/{sample}_exclude_chrM.sorted.bam.flagstat"
    log:    "00log/{sample}_exclude_chrM_flagstat_bam"
    threads: 5
    params: jobname = "{sample}"
    message: "flagstat_bam {input}: {threads} threads"
    shell:
        """
        ml samtools
        samtools flagstat {input} -@ 5 > {output} 2> {log}
        """

# https://github.com/taoliu/MACS/issues/145
rule call_peaks_macs2:
    input: "05aln_exclude_chrM/{sample}_exclude_chrM.sorted.bam", "05aln_exclude_chrM/{sample}_exclude_chrM.sorted.bam.bai"
    output: bed = "06peak_macs2/{sample}_macs2_peaks.broadPeak"
    log: "00log/{sample}_call_peaks_macs2.log"
    params:
        name = "{sample}_macs2",
        jobname = "{sample}"
    message: "call_peaks macs2 {input}: {threads} threads"
    shell:
        """
       ml macs
       ## for macs2, when nomodel is set, --extsize is default to 200bp, this is the same as 2 * shift-size in macs14.
        macs2 callpeak -t {input[0]} \
            --keep-dup all -f BAMPE -g {config[macs2_g]} -B \
            --outdir 06peak_macs2 -n {params.name} -p {config[macs2_pvalue]} \
            --broad --broad-cutoff {config[macs2_pvalue_broad]} &> {log}
        """

config file

# bowtie1 index
idx_bt2: ~/index/bowtie2/humanized/ucsc

# genome size for bedtools slop
genome_size: ~/index/ori_genomic/humanized_mice/ucsc/hg38.chrom.sizes.human.mm10.chrom.sizes.mouse

## genome fasta for nucleoATAC
genome_fasta: ~/index/ori_genomic/humanized_mice/ucsc/hg38_human_mm10_mouse.fa

macs2_g: 4.57e+09#for human just "hs", check the help 
macs2_pvalue: 1e-5

macs2_pvalue_broad: 1e-5

遞交文件

#!/bin/bash
#SBATCH --job-name=sbatch_ATAC_seq_flow
#SBATCH --time=24:00:00
#SBATCH --cpus-per-task=40
#SBATCH --mem=60g
module load snakemake singularity
ml python/3.6
snakemake  -j 40 --use-singularity 
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末安拟,一起剝皮案震驚了整個濱河市盛末,隨后出現(xiàn)的幾起案子凳厢,更是在濱河造成了極大的恐慌,老刑警劉巖料身,帶你破解...
    沈念sama閱讀 216,372評論 6 498
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件,死亡現(xiàn)場離奇詭異衩茸,居然都是意外死亡芹血,警方通過查閱死者的電腦和手機,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 92,368評論 3 392
  • 文/潘曉璐 我一進店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來幔烛,“玉大人啃擦,你說我怎么就攤上這事《鲂” “怎么了令蛉?”我有些...
    開封第一講書人閱讀 162,415評論 0 353
  • 文/不壞的土叔 我叫張陵,是天一觀的道長乡恕。 經(jīng)常有香客問我言询,道長,這世上最難降的妖魔是什么傲宜? 我笑而不...
    開封第一講書人閱讀 58,157評論 1 292
  • 正文 為了忘掉前任运杭,我火速辦了婚禮,結(jié)果婚禮上函卒,老公的妹妹穿的比我還像新娘辆憔。我一直安慰自己,他們只是感情好报嵌,可當我...
    茶點故事閱讀 67,171評論 6 388
  • 文/花漫 我一把揭開白布虱咧。 她就那樣靜靜地躺著,像睡著了一般锚国。 火紅的嫁衣襯著肌膚如雪腕巡。 梳的紋絲不亂的頭發(fā)上,一...
    開封第一講書人閱讀 51,125評論 1 297
  • 那天血筑,我揣著相機與錄音绘沉,去河邊找鬼。 笑死豺总,一個胖子當著我的面吹牛车伞,可吹牛的內(nèi)容都是我干的。 我是一名探鬼主播喻喳,決...
    沈念sama閱讀 40,028評論 3 417
  • 文/蒼蘭香墨 我猛地睜開眼另玖,長吁一口氣:“原來是場噩夢啊……” “哼!你這毒婦竟也來了表伦?” 一聲冷哼從身側(cè)響起谦去,我...
    開封第一講書人閱讀 38,887評論 0 274
  • 序言:老撾萬榮一對情侶失蹤,失蹤者是張志新(化名)和其女友劉穎蹦哼,沒想到半個月后哪轿,有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體,經(jīng)...
    沈念sama閱讀 45,310評論 1 310
  • 正文 獨居荒郊野嶺守林人離奇死亡翔怎,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點故事閱讀 37,533評論 2 332
  • 正文 我和宋清朗相戀三年窃诉,在試婚紗的時候發(fā)現(xiàn)自己被綠了杨耙。 大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
    茶點故事閱讀 39,690評論 1 348
  • 序言:一個原本活蹦亂跳的男人離奇死亡飘痛,死狀恐怖珊膜,靈堂內(nèi)的尸體忽然破棺而出,到底是詐尸還是另有隱情宣脉,我是刑警寧澤车柠,帶...
    沈念sama閱讀 35,411評論 5 343
  • 正文 年R本政府宣布,位于F島的核電站塑猖,受9級特大地震影響竹祷,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜羊苟,卻給世界環(huán)境...
    茶點故事閱讀 41,004評論 3 325
  • 文/蒙蒙 一塑陵、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧蜡励,春花似錦令花、人聲如沸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 31,659評論 0 22
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至稽寒,卻和暖如春扮碧,著一層夾襖步出監(jiān)牢的瞬間,已是汗流浹背杏糙。 一陣腳步聲響...
    開封第一講書人閱讀 32,812評論 1 268
  • 我被黑心中介騙來泰國打工慎王, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留,地道東北人搔啊。 一個月前我還...
    沈念sama閱讀 47,693評論 2 368
  • 正文 我出身青樓,卻偏偏與公主長得像北戏,于是被迫代替她去往敵國和親负芋。 傳聞我的和親對象是個殘疾皇子,可洞房花燭夜當晚...
    茶點故事閱讀 44,577評論 2 353