chr長度文件
less Cbre.chr.gff3|grep '^Chr' >Cbre.chr.gff3
less Cbre.chr.gff3|cut -d ' ' -f1|sort -u >id
seqkit grep -f id ../genome.toplevel.fa >Cbre.chr.fna
fastalength Cbre.chr.fna|awk '{print $2"\t1\t"$1}' >Chrlength.txt
gc含量
#生成窗口文件割坠,窗口大小50kb
bedtools makewindows -w 50000 -g genome.len > genome.window.bed
#計算每個窗口平均GC含量
seqtk subseq Cbre.chr.fna genome.window.bed > genome.window.fasta
seqtk comp genome.window.fasta |awk '{print $1 "\t" ($4+$5)/($3+$4+$5+$6)}' |awk -F ":|-" '{print $1"\t"$2"\t"$3"\t"$4}' > sind_gc.txt
計算每個窗口基因條數(shù)
#先將蛋白注釋的gff文件轉換成bed文件,在py_36環(huán)境中將gff轉化為bed
conda activate py_36
convert2bed -i gff < Cbre.chr.gff3 > Cbre.bed
bedtools intersect -a genome.window.bed -b Cbre.bed -c -F 0.1 > sind_genecount.txt
track1齐帚,基因在染色體上面的正負鏈
less Cbre.chr.gff3|awk '{print $1"\t"$4"\t"$5"\t"$7}'|sed 's/+/0.5/g'|sed 's/-/-0.5/g' >track.txt
突出顯示的基因位置
grep -f SUS.id Cbre.chr.gff3|grep 'gene'|sort|awk '{print $1"\t"$4"\t"$5"\tSUS"}' > label.txt
重復序列含量
/home/lx_sky6/software/RepeatMasker/util/rmOutToGFF3.pl /home/lx_sky6/yt/0729_Carex/7-repeat_toplevel/2-EDTA/genome.Carex_breviculmis.toplevel.fa.mod.EDTA.final/genome.Carex_breviculmis.toplevel.fa.mod.EDTA.intact.fa.out >> Cbre.repeat.gff
bedtools coverage -a genome.window.bed -b Cbre.repeat.gff |awk '{print $1 "\t" $2 "\t" $3 "\t" $7}' >Cbre_repeat.txt
link
conda activate jcvi
python3 -m jcvi.formats.gff bed Cbre.chr.gff3 -o Cbre1.bed
python3 -m jcvi.formats.gff bed Cbre.chr.gff3 -o Cbre2.bed
python3 -m jcvi.formats.bed uniq Cbre1.bed
python3 -m jcvi.formats.bed uniq Cbre2.bed
awk '{print $4".t1"}' Cbre2.bed | seqkit grep -f - ../21-Collinearity/Cbre.chr.cds
awk '{print $4".t1"}' Cbre2.bed | seqkit grep -f - ../21-Collinearity/Cbre.chr.cds >Cbre2.cds
##這里由于gff3中的mRNA的id是不帶t1的,二cds和pep的id是代t1的彼哼,所以要加上.t1才能提取出來对妄。
##下面是gff3
Chr1 EVM gene 34110 34304 . - . ID=evm00001;Name=evm00001
Chr1 EVM mRNA 34110 34304 . - . ID=evm00001.t1;Parent=evm00001
Chr1 EVM exon 34110 34304 . - . ID=evm00001.t1.exon1;Parent=evm00001.t1
Chr1 EVM CDS 34110 34304 . - 0 ID=evm00001.t1.CDS1;Parent=evm00001.t1
##為了與生成的bed文件保持一致,提取出來的cds的id仍然要去掉.t1
sed -i 's/.t1//g' Cbre1.cds
sed -i 's/.t1//g' Cbre2.cds
python -m jcvi.compara.catalog ortholog --no_strip_names Cbre1 Cbre2
python -m jcvi.compara.synteny screen --minspan=30 --simple Cbre1.Cbre2.anchors Cbre1.Cbre2.anchors.new
python /home/lx_sky6/software/miniconda3/envs/jcvi/simple2links.py Cbre1.Cbre2.anchors.simple
mv Cbre1.Cbre2.anchors.simple_link.txt sind_link.txt