$ mkdir ref #新建reference文件夾
xingkx 17:41:23 ~
$ cd ref
xingkx 17:41:31 ~/ref
$ mkdir mm9
xingkx 17:41:48 ~/ref
$ cd mm9
xingkx 17:41:54 ~/ref/mm9
$ wget --timestamping \
'ftp://hgdownload.cse.ucsc.edu/goldenPath/mm9/chromosomes/*'
--2023-04-03 17:41:57-- ftp://hgdownload.cse.ucsc.edu/goldenPath/mm9/chromosomes/*
=> ‘.listing’
Resolving hgdownload.cse.ucsc.edu (hgdownload.cse.ucsc.edu)... 128.114.119.163
Connecting to hgdownload.cse.ucsc.edu (hgdownload.cse.ucsc.edu)|128.114.119.163|:21... ^C
xingkx 17:42:26 ~/ref/mm9
試圖通過(guò)網(wǎng)頁(yè)鏈接下載小鼠mm9參考基因組失敗臂容,于是手動(dòng)下載肮街。
不要random文件逸绎,要下載md5文件
$ cd ..
xingkx 17:46:30 ~/ref
$ vim md5.txt
xingkx 17:49:04 ~/ref
$ cat md5.txt #查看md5文件赃蛛,包含了要檢測(cè)的文件
79d6d3a0198d6dccaa6c2af23ec9be00 chr1.fa.gz
24893d2118e606d341f3a91836f6267d chr10.fa.gz
f01fd408af17f393720044a44b3c4ee0 chr11.fa.gz
d882d9ba5696b6e6fdd80179670bc463 chr12.fa.gz
d400282edeb4c0529d5196019372d3c1 chr13.fa.gz
e4735f6e161a9498c2487958a6153891 chr13_random.fa.gz
1797fcbad80d0794b598d6fb6053e299 chr14.fa.gz
f182f5e0e3e5f90b3775b6e32274319b chr15.fa.gz
ef9806225139e113257e0e20717e2285 chr16.fa.gz
46cc30a95e412eb0320255335a42863f chr16_random.fa.gz
57e5044feac506810ed03d5bab6bb231 chr17.fa.gz
e16f391324164d5499fc48c05ef529e8 chr17_random.fa.gz
d409da85d239f0d8b1bfaa3a494be121 chr18.fa.gz
94ad4a7ceb60ad29feadb3d8aa2a925a chr19.fa.gz
c06a6759bd5f7a14c7fe4aa0a6423e7e chr1_random.fa.gz
14eaca978b51529479b5497634bf90ce chr2.fa.gz
180b4b2179fd550a39d1cd0b841e4834 chr3.fa.gz
ddd635cdcebd2c6666b4dadbf1c52fd2 chr3_random.fa.gz
c86d544389b3c77b54070d8d041fbc19 chr4.fa.gz
de2a1bb1505fd53cf2b9849d3a16358d chr4_random.fa.gz
d0d1e3c1a3914c1d2ec807330ee4d474 chr5.fa.gz
2e6a7618b193c877b9534101136bde47 chr5_random.fa.gz
4276c21b867322940abfccfd98e9c864 chr6.fa.gz
6e609fe100868e5b3051b9801b9efcf8 chr7.fa.gz
1f74d499fd66fbc3d2b239b4076e2177 chr7_random.fa.gz
e9b1b3ce3a1ce0a540bfb7eb2af44f59 chr8.fa.gz
0fad939822aab8a83fd9ef155bd0b52a chr8_random.fa.gz
0b2c504c618a5dd04619bd82171cc6dd chr9.fa.gz
4e30bb88d367d149a0aa050cc4e423cd chr9_random.fa.gz
ea02ff9a4831504423d63f26a329b74e chrM.fa.gz
e56577087f9765e2ecdf9d605ad2f3b8 chrUn_random.fa.gz
28b4c65ff0bf555bdf305d9b629794b4 chrX.fa.gz
a3c21a22e22d8fb4af962195e8b81e22 chrX_random.fa.gz
39079a4b2762f2237eca347d52c11215 chrY.fa.gz
5e013cab47168d75e96bdfc123771d54 chrY_random.fa.gz
xingkx 17:50:58 ~/ref
$ mv md5.txt mm9
xingkx 17:57:28 ~/ref
$ ls
mm9
xingkx 17:57:35 ~/ref
$ cd mm9
xingkx 17:58:03 ~/ref/mm9
$ ls
chr10.fa.gz chr14.fa.gz chr18.fa.gz chr3.fa.gz chr7.fa.gz chrX.fa.gz
chr11.fa.gz chr15.fa.gz chr19.fa.gz chr4.fa.gz chr8.fa.gz chrY.fa.gz
chr12.fa.gz chr16.fa.gz chr1.fa.gz chr5.fa.gz chr9.fa.gz md5.txt
chr13.fa.gz chr17.fa.gz chr2.fa.gz chr6.fa.gz chrM.fa.gz
xingkx 17:59:56 ~/ref/mm9
$ md5sum -c md5.txt #檢測(cè)下載數(shù)據(jù)的md5值
chr1.fa.gz: OK
chr10.fa.gz: OK
chr11.fa.gz: OK
chr12.fa.gz: OK
chr13.fa.gz: OK
md5sum: chr13_random.fa.gz: No such file or directory
chr13_random.fa.gz: FAILED open or read
chr14.fa.gz: OK
chr15.fa.gz: OK
chr16.fa.gz: OK
md5sum: chr16_random.fa.gz: No such file or directory
chr16_random.fa.gz: FAILED open or read
chr17.fa.gz: OK
md5sum: chr17_random.fa.gz: No such file or directory
chr17_random.fa.gz: FAILED open or read
chr18.fa.gz: OK
chr19.fa.gz: OK
md5sum: chr1_random.fa.gz: No such file or directory
chr1_random.fa.gz: FAILED open or read
chr2.fa.gz: OK
chr3.fa.gz: OK
md5sum: chr3_random.fa.gz: No such file or directory
chr3_random.fa.gz: FAILED open or read
chr4.fa.gz: OK
md5sum: chr4_random.fa.gz: No such file or directory
chr4_random.fa.gz: FAILED open or read
chr5.fa.gz: OK
md5sum: chr5_random.fa.gz: No such file or directory
chr5_random.fa.gz: FAILED open or read
chr6.fa.gz: OK
chr7.fa.gz: OK
md5sum: chr7_random.fa.gz: No such file or directory
chr7_random.fa.gz: FAILED open or read
chr8.fa.gz: OK
md5sum: chr8_random.fa.gz: No such file or directory
chr8_random.fa.gz: FAILED open or read
chr9.fa.gz: OK
md5sum: chr9_random.fa.gz: No such file or directory
chr9_random.fa.gz: FAILED open or read
chrM.fa.gz: OK
md5sum: chrUn_random.fa.gz: No such file or directory
chrUn_random.fa.gz: FAILED open or read
chrX.fa.gz: OK
md5sum: chrX_random.fa.gz: No such file or directory
chrX_random.fa.gz: FAILED open or read
chrY.fa.gz: OK
md5sum: chrY_random.fa.gz: No such file or directory
chrY_random.fa.gz: FAILED open or read
md5sum: WARNING: 13 listed files could not be read
xingkx 18:00:31 ~/ref/mm9
$ gunzip *gz #解壓當(dāng)前目錄下的所有g(shù)z文件
xingkx 18:01:22 ~/ref/mm9
$ ls #解壓成功,全部以fa結(jié)尾
chr10.fa chr13.fa chr16.fa chr19.fa chr3.fa chr6.fa chr9.fa chrY.fa
chr11.fa chr14.fa chr17.fa chr1.fa chr4.fa chr7.fa chrM.fa md5.txt
chr12.fa chr15.fa chr18.fa chr2.fa chr5.fa chr8.fa chrX.fa
xingkx 18:01:58 ~/ref/mm9
$ cat *fa > mm9.fa #將所有fa文件重定向到mm9.fa,拼接起來(lái)
xingkx 18:02:39 ~/ref/mm9
$ ls #重定向后港粱,原來(lái)的文件依然存在
chr10.fa chr14.fa chr18.fa chr3.fa chr7.fa chrX.fa mm9.fa.fai
chr11.fa chr15.fa chr19.fa chr4.fa chr8.fa chrY.fa nohup.out
chr12.fa chr16.fa chr1.fa chr5.fa chr9.fa md5.txt
chr13.fa chr17.fa chr2.fa chr6.fa chrM.fa mm9.fa
(chipseq) xingkx 19:21:38 ~/ref/mm9
激活chipseq環(huán)境螃成,使用samtools工具
conda activate chipseq
samtools
samtools faidx mm9.fa
ls
$ cat mm9.fa.fai
chr10 129993255 7 50 51
chr11 121843856 132593135 50 51
chr12 121257530 256873876 50 51
chr13 120284312 380556564 50 51
chr14 125194864 503246570 50 51
chr15 103494974 630945339 50 51
chr16 98319150 736510220 50 51
chr17 95272651 836795760 50 51
chr18 90772031 933973872 50 51
chr19 61342430 1026561351 50 51
chr1 197195432 1089130636 50 51
chr2 181748087 1290269983 50 51
chr3 159599783 1475653038 50 51
chr4 155630120 1638444823 50 51
chr5 152537259 1797187552 50 51
chr6 149517037 1952775563 50 51
chr7 152524553 2105282947 50 51
chr8 131738871 2260857998 50 51
chr9 124076172 2395231653 50 51
chrM 16299 2521789355 50 51
chrX 166650296 2521805986 50 51
chrY 15902555 2691789294 50 51
(chipseq) xingkx 19:28:35 ~/ref/mm9
使用bowtie2工具比對(duì)工具,bowtie2-build構(gòu)建索引
345 bowtie
346 bowtie2
347 bowtie2-build -h
348 nohup bowtie2-build --threads 4 mm9.fa mm9 & #線程數(shù)是4,前綴是mm9
表示任務(wù)跑完了 1.bg 2.top
bg命令來(lái)自于英文單詞background的縮寫查坪,中文譯為“背景寸宏、后臺(tái)”,其功能是用于將作業(yè)放到后臺(tái)運(yùn)行偿曙。在Linux系統(tǒng)終端中執(zhí)行命令時(shí)氮凝,如遇到備份、打包望忆、下載等長(zhǎng)時(shí)間的任務(wù)罩阵,就會(huì)很長(zhǎng)一段時(shí)間占用寶貴的終端執(zhí)行界面,無(wú)法執(zhí)行其他任務(wù)炭臭。而bg命令則可以將指定的命令任務(wù)放到系統(tǒng)后臺(tái)去執(zhí)行永脓,使得終端界面可以繼續(xù)其他工作,效果等同于“命令 &”的執(zhí)行效果鞋仍。
$ ls
chr10.fa chr15.fa chr1.fa chr6.fa chrX.fa mm9.3.bt2 mm9.rev.2.bt2
chr11.fa chr16.fa chr2.fa chr7.fa chrY.fa mm9.4.bt2 nohup.out
chr12.fa chr17.fa chr3.fa chr8.fa md5.txt mm9.fa
chr13.fa chr18.fa chr4.fa chr9.fa mm9.1.bt2 mm9.fa.fai
chr14.fa chr19.fa chr5.fa chrM.fa mm9.2.bt2 mm9.rev.1.bt2
xingkx 09:24:49 ~/ref/mm9
bt2結(jié)尾的文件常摧,索引構(gòu)建成功
$ cd clean #使用剪切過(guò)后的文件
(chipseq) xingkx 22:58:19 ~/chipseq/clean
$ ls
fastqc SRR391033.fastq.gz_trimming_report.txt SRR391033_trimmed.fq.gz
(chipseq) xingkx 22:58:21 ~/chipseq/clean
$ bowtie2 -p 20 -x ../../ref/mm9/mm9 -U SRR391033_trimmed.fq.gz -b ../align/sRR391033.bam
# 最后一個(gè)mm9指前綴
Warning: Output file '../align/sRR391033.bam' was specified without -S. This will not work in future Bowtie 2 versions. Please use -S instead.
0 reads
0.00% overall alignment rate
(chipseq) xingkx 23:03:40 ~/chipseq/clean
$ bowtie2 -p 20 -x ../../ref/mm9/mm9 -U SRR391033_trimmed.fq.gz -S ../align/SRR391033.sam
#bam文件不行,使用sam文件
Error while flushing and closing output
terminate called after throwing an instance of 'int'
Aborted (core dumped)
(ERR): bowtie2-align exited with value 134
(chipseq) xingkx 23:05:21 ~/chipseq/clean
假設(shè)你發(fā)現(xiàn)前臺(tái)運(yùn)行的一個(gè)程序需要很長(zhǎng)的時(shí)間,但是需要干其他的事情,你就可以用 Ctrl-Z ,終止這個(gè)程序,然后可以看到系統(tǒng)提示:
[1]+ Stopped /root/bin/rsync.sh
然后我們可以把程序調(diào)度到后臺(tái)執(zhí)行:(bg 后面的數(shù)字為作業(yè)號(hào))
#bg 1
[1]+ /root/bin/rsync.sh &
用 jobs 命令查看正在運(yùn)行的任務(wù):
#jobs
[1]+ Running /root/bin/rsync.sh &
如果想把它調(diào)回到前臺(tái)運(yùn)行,可以用
#fg 1
/root/bin/rsync.sh
這樣,你在控制臺(tái)上就只能等待這個(gè)任務(wù)完成了.
& 將指令丟到后臺(tái)中去執(zhí)行
[ctrl]+z 將前臺(tái)任務(wù)丟到后臺(tái)中暫停
jobs 查看后臺(tái)的工作狀態(tài)
fg %jobnumber 將后臺(tái)的任務(wù)拿到前臺(tái)來(lái)處理
bg %jobnumber 將任務(wù)放到后臺(tái)中去處理
kill 管理后臺(tái)的任務(wù)
命令運(yùn)行時(shí)使用CTRL+Z威创,強(qiáng)制當(dāng)前進(jìn)程轉(zhuǎn)為后臺(tái)落午,并使之停止。
ctrl+z 加 bg 效果等同于 nohup &
存儲(chǔ)空間問(wèn)題肚豺,home下你個(gè)人就6G的空間溃斋,mnt下你應(yīng)該會(huì)有幾T的空間,限制不一樣的吸申,把chipseq 文件夾移動(dòng)到/mnt,然后重新跑任務(wù)
cp /home/xingkx/chipseq /mnt/disk4/xingkx
$ conda create -n chipseq2 python=3.9
Retrieving notices: ...working... done
Collecting package metadata (current_repodata.json): failed
CondaHTTPError: HTTP 000 CONNECTION FAILED for url <https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/linux-64/current_repodata.json>
Elapsed: -
An HTTP error occurred when trying to retrieve this URL.
HTTP errors are often intermittent, and a simple retry will get you on your way.
'https//mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/linux-64'
(chipseq) xingkx 22:44:00 /mnt/disk4/xingkx/chipseq/align
bowtie2 -p 1 -x /home/xingkx/ref/mm9/mm9 -U SRR391033_trimmed.fq.gz -S /home/xingkx/chipseq/align/test.sam
線程數(shù)改小梗劫,移動(dòng)到home目錄下跑
查看sam文件的兩種方式:
less SRR391033.sam
samtools view SRR391033.sam |less
sam文件轉(zhuǎn)bam文件,使用samtools工具,bam 文件是排過(guò)序的
samtools sort -o SRR391033.sort.bam -O BA M -@ 4 SRR391033.sam
查看bam 文件:
less SRR391033.sort.bam
samtools view SRR391033.sort.bam |less
bam文件各列的含義:
https://www.cnblogs.com/xudongliang/p/5437850.html
使用macs2 call peak
macs2 callpeak -t SRR391033.sort.bam -g mm --outdir ../peaks/ -n SRR391033 -q 0.05
這里有p value,q value的概念截碴,多重校正梳侨,q是校正過(guò)后的p值
報(bào)錯(cuò)
python 版本是2.7,改成3
python -V
pip install numpy -U #Python使用pip安裝Numpy模塊
conda create chipseq2 python=3.9
一直因?yàn)榫W(wǎng)絡(luò)問(wèn)題報(bào)錯(cuò) 服務(wù)器沒(méi)聯(lián)網(wǎng)